From b0e89a263c4d58560d350b392470c38dd510a3d9 Mon Sep 17 00:00:00 2001
From: mykola2312 <49044616+mykola2312@users.noreply.github.com>
Date: Wed, 14 Aug 2024 17:24:34 +0300
Subject: [PATCH] merge Parsable-Instructions into this project for integrity.
 rtdisasm needs lookup tables of instruction opcodes

---
 Makefile                           |    11 +-
 genc.py                            |   245 +
 src/rtdisasm_table.cgz             |   Bin 8247 -> 0 bytes
 xml/LICENSE                        |    22 +
 xml/raw/x86/AMD/3DNow.xml          |   210 +
 xml/raw/x86/AMD/3DNow_Rules.dtd    |    15 +
 xml/raw/x86/AMD/SSE5.xml           |  1119 ++
 xml/raw/x86/AMD/SSE5_Rules.dtd     |    17 +
 xml/raw/x86/AMD/XOP.xml            |   990 +
 xml/raw/x86/AMD/XOP_Rules.dtd      |    15 +
 xml/raw/x86/Intel/AVX512_Rules.dtd |    38 +
 xml/raw/x86/Intel/AVX512_r22.xml   | 25901 +++++++++++++++++++++++++++
 xml/raw/x86/Intel/AVX512_r24.xml   | 25739 ++++++++++++++++++++++++++
 xml/raw/x86/Intel/AZ.xml           | 22780 +++++++++++++++++++++++
 xml/raw/x86/Intel/AZ_Rules.dtd     |    38 +
 15 files changed, 77133 insertions(+), 7 deletions(-)
 create mode 100644 genc.py
 delete mode 100644 src/rtdisasm_table.cgz
 create mode 100644 xml/LICENSE
 create mode 100644 xml/raw/x86/AMD/3DNow.xml
 create mode 100644 xml/raw/x86/AMD/3DNow_Rules.dtd
 create mode 100644 xml/raw/x86/AMD/SSE5.xml
 create mode 100644 xml/raw/x86/AMD/SSE5_Rules.dtd
 create mode 100644 xml/raw/x86/AMD/XOP.xml
 create mode 100644 xml/raw/x86/AMD/XOP_Rules.dtd
 create mode 100644 xml/raw/x86/Intel/AVX512_Rules.dtd
 create mode 100644 xml/raw/x86/Intel/AVX512_r22.xml
 create mode 100644 xml/raw/x86/Intel/AVX512_r24.xml
 create mode 100644 xml/raw/x86/Intel/AZ.xml
 create mode 100644 xml/raw/x86/Intel/AZ_Rules.dtd

diff --git a/Makefile b/Makefile
index 54e74aa..edc036c 100644
--- a/Makefile
+++ b/Makefile
@@ -8,13 +8,13 @@ CC					=	gcc
 AS					=	as
 AR					=	ar
 LD					=	ld
-GZIP				=	gzip
+PYTHON				=	python
 CFLAGS				=	-Wall -I$(INC_DIR)
 ASFLAGS				=
 LDFLAGS				=	-z noexecstack -lcap
 
 RTDISASM_SRC		=	rtdisasm.c
-RTDISASM_OBJ		:=	$(addprefix $(OBJ_DIR)/,$(patsubst %.s,%.o,$(patsubst %.c,%.o,$(RTDISASM_SRC)))) $(OBJ_DIR)/rtdisasm_table.o
+RTDISASM_OBJ		:=	$(addprefix $(OBJ_DIR)/,$(patsubst %.s,%.o,$(patsubst %.c,%.o,$(RTDISASM_SRC))))
 RTDISASM_SRC		:=	$(addprefix $(SRC_DIR)/,$(RTDISASM_SRC))
 RTDISASM_DEPS		=	rtdisasm.h rtdisasm_table.h
 RTDISASM_DEPS		:=	$(addprefix $(INC_DIR)/,$(RTDISASM_DEPS))
@@ -38,15 +38,12 @@ DUMMY_TARGET_SRC	:=	$(addprefix $(SRC_DIR)/,$(DUMMY_TARGET_SRC))
 $(OBJ_DIR)/%.o: $(SRC_DIR)/%.c
 	$(CC) $(CFLAGS) -c -o $@ $<
 
-# compressed C files
-$(OBJ_DIR)/%.o: $(SRC_DIR)/%.cgz
-	$(GZIP) -d -c $< | $(CC) -x c $(CFLAGS) -c -o $@ -
-
 $(OBJ_DIR)/%.o: $(SRC_DIR)/%.s
 	$(AS) $(ASFLAGS) -o $@ $<
 
 rtdisasm: $(RTDISASM_OBJ) $(RTDISASM_DEPS)
-	$(AR) -crs $(BIN_DIR)/librtdisasm.a $(RTDISASM_OBJ)
+	$(PYTHON) genc.py | $(CC) -x c $(CFLAGS) -c -o $(OBJ_DIR)/rtdisasm_table.o -
+	$(AR) -crs $(BIN_DIR)/librtdisasm.a $(RTDISASM_OBJ) $(OBJ_DIR)/rtdisasm_table.o
 
 rtdisasm_test: $(RTDISASM_TEST_OBJ) $(RTDISASM_TEST_DEPS)
 	$(CC) $(LDFLAGS) $(LIB_DIR)/librtdisasm.a -o $(BIN_DIR)/$@ $(RTDISASM_TEST_OBJ)
diff --git a/genc.py b/genc.py
new file mode 100644
index 0000000..caa4304
--- /dev/null
+++ b/genc.py
@@ -0,0 +1,245 @@
+import re
+import xml.etree.ElementTree as ET
+from enum import Enum
+
+class InstructionType(Enum):
+    STANDARD = 0
+    VEX = 1
+    EVEX = 2
+
+    def __str__(self):
+        if self == InstructionType.STANDARD: return "std"
+        elif self == InstructionType.VEX: return "vex"
+        elif self == InstructionType.EVEX: return "evex"
+
+    def value(self):
+        if self == InstructionType.STANDARD: return 0
+        elif self == InstructionType.VEX: return 1
+        elif self == InstructionType.EVEX: return 2
+
+class Instruction:
+    def __init__(self, ins):
+        self._opc = ins.find("opc").text
+        self.x32m = ins.attrib["x32m"]
+        self.x64m = ins.attrib["x64m"]
+        self.mnemonic = ins.find("mnem").text
+        
+        self.bytes = None
+
+    def get_type(self):
+        pass
+
+    def has_rex(self):
+        return False
+
+    def has_digit(self):
+        return False
+    
+    def has_modrm(self):
+        return False
+
+    def has_imm(self):
+        return False
+
+    def has_value(self):
+        return False
+
+    def has_opreg(self):
+        return False
+
+    def __str__(self):
+        return f"<{self.get_type()}> {self.mnemonic} bytes {self.bytes} rex {self.has_rex()} digit {self.has_digit()} modrm {self.has_modrm()} imm {self.has_imm()} value {self.has_value()} opreg {self.has_opreg()}"
+
+class InstructionCommon:
+    REX_REGEX = re.compile("^REX\\.(.)")
+    BYTES_REGEX = re.compile("([0-9A-F][0-9A-F])")
+    DIGIT_REGEX = re.compile("\\/(\\d)")
+    MODRM_REGEX = re.compile("\\/r")
+    IMM_REGEX = re.compile("i(.)")
+    VALUE_REGEX = re.compile("c(.)")
+    OPREG_REGEX = re.compile("r(.)")
+
+class StandardInstruction(Instruction):
+    def __init__(self, ins):
+        super().__init__(ins)
+
+        rex = InstructionCommon.REX_REGEX.search(self._opc)
+        bytes = InstructionCommon.BYTES_REGEX.findall(self._opc)
+        digit = InstructionCommon.DIGIT_REGEX.search(self._opc)
+        modrm = InstructionCommon.MODRM_REGEX.search(self._opc)
+        imm = InstructionCommon.IMM_REGEX.search(self._opc)
+        value = InstructionCommon.VALUE_REGEX.search(self._opc)
+        opreg = InstructionCommon.OPREG_REGEX.search(self._opc)
+        self.bytes = bytes
+
+        self.rex = None
+        self.digit = None
+        self.modrm = False
+        self.imm = None
+        self.value = None
+        self.opreg = None
+
+        if rex: self.rex = rex.group(1)
+        if digit: self.digit = int(digit.group(1))
+        if modrm: self.modrm = True
+        if imm: self.imm = imm.group(1)
+        if value: self.value = value.group(1)
+        if opreg: self.opreg = opreg.group(1)
+    
+    def get_type(self):
+        return InstructionType.STANDARD
+
+    def has_rex(self):
+        return self.rex is not None
+
+    def has_digit(self):
+        return self.digit is not None
+
+    def has_modrm(self):
+        return self.modrm or (self.digit is not None)
+
+    def has_imm(self):
+        return self.imm is not None
+
+    def has_value(self):
+        return self.value is not None
+
+    def has_opreg(self):
+        return self.opreg is not None
+
+class VEXInstruction(Instruction):
+    def __init__(self, ins):
+        super().__init__(ins)
+
+        # fix string because intel employees keep bashing keyboard with random keys
+        self._opc = re.sub(r"\. ", ".", self._opc)
+
+        parts = self._opc.split(" ")
+        (vex, opc) = (parts[0], "".join(parts[1:]))
+        vex_parts = vex.split(".")
+        
+        self.lig = False
+        if "128" in vex_parts or "L0" in vex_parts or "LZ" in vex_parts:
+            self.l = 128
+        elif "256" in vex_parts or "L1" in vex_parts:
+            self.l = 256
+        elif "LIG" in vex_parts:
+            self.l = 0
+            self.lig = True
+        else: raise RuntimeError("VEX.L is unknown!")
+ 
+        self.wig = False
+        if "W0" in vex_parts: self.w = False
+        elif "W1" in vex_parts: self.w = True
+        elif "WIG" in vex_parts:
+            self.wig = True
+            self.w = False
+        else: self.w = False # just default it to False, it's not a big deal
+
+        self.bytes = InstructionCommon.BYTES_REGEX.findall(opc)
+        
+        modrm = InstructionCommon.MODRM_REGEX.search(opc)
+        imm = InstructionCommon.IMM_REGEX.search(opc)
+
+        self.modrm = True if modrm else False
+        self.imm = imm.group(1) if imm else None
+    
+    def get_type(self):
+        return InstructionType.VEX
+
+    def has_modrm(self):
+        return self.modrm
+    
+    def has_imm(self):
+        return self.imm is not None
+
+class EVEXInstruction(Instruction):
+    def __init__(self, ins):
+        super().__init__(ins)
+
+        # fix string because intel employees keep bashing keyboard with random keys
+        self._opc = re.sub(r"\. ", ".", self._opc)
+
+        parts = self._opc.split(" ")
+        (evex, opc) = (parts[0], "".join(parts[1:]))
+        evex_parts = evex.split(".")
+
+        print(evex, opc)
+
+        self.lig = False
+        if "128" in evex_parts: self.l = 128
+        elif "256" in evex_parts: self.l = 256
+        elif "512" in evex_parts: self.l = 512
+        elif "LIG" in evex_parts or "LLIG" in evex_parts:
+            self.l = 0
+            self.lig = True
+        else: raise RuntimeError("EVEX.L and EVEX.LIG is unknown!")
+        
+        self.wig = False
+        if "W0" in evex_parts: self.w = False
+        elif "W1" in evex_parts: self.w = True
+        elif "WIG" in evex_parts:
+            self.w = False
+            self.wig = True
+        else: self.w = False
+
+        self.bytes = InstructionCommon.BYTES_REGEX.findall(opc)
+        
+        modrm = InstructionCommon.MODRM_REGEX.search(opc)
+        imm = InstructionCommon.IMM_REGEX.search(opc)
+        
+        self.modrm = True if modrm else False
+        self.imm = imm.group(1) if imm else None
+    
+    def get_type(self):
+        return InstructionType.EVEX
+
+    def has_modrm(self):
+        return self.modrm
+
+    def has_imm(self):
+        return self.imm is not None
+
+def parse_instruction(ins):
+    opc = ins.find("opc").text
+    if "EVEX" in opc: return EVEXInstruction(ins)
+    elif "VEX" in opc: return VEXInstruction(ins)
+    else: return StandardInstruction(ins)
+
+class InstructionGroup:
+    def __init__(self, common):
+        self.brief = common.find("brief").text
+        self.instructions = [parse_instruction(ins) for ins in common.iter("ins")]
+
+def parse_file(path):
+    tree = ET.parse(path)
+    root = tree.getroot()
+
+    groups = [InstructionGroup(common) for common in root.iter("common")]
+    return groups
+
+# TODO: instead of gzipping pipe directly C code into GCC
+# FIXME: instruction_t has no actual rex, imm, value values
+def generate_table(groups):
+    table_len = 0
+    # header
+    print("#include \"rtdisasm_table.h\"\n")
+    print("const instruction_t rtdisasm_table[] = {")
+    # entries
+    for group in groups:
+        for i in group.instructions:
+            opcode = ",".join(["0x{}".format(byte) for byte in i.bytes])
+            opcode_len = len(i.bytes)
+            print("\t{{ .info = {{ .type = {}, .has_rex = {}, .has_digit = {}, .has_modrm = {}, .has_imm = {}, .has_value = {}, .has_opreg = {} }}, .opcode_len = {}, .opcode = {{ {} }}  }},".format(
+                i.get_type().value(), int(i.has_rex()), int(i.has_digit()), int(i.has_modrm()), int(i.has_imm()), int(i.has_value()), int(i.has_opreg()), opcode_len, opcode
+            ))
+            table_len += 1
+    # footer
+    print("}};\n\nconst unsigned rtdisasm_table_len = {};".format(table_len))
+
+if __name__ == "__main__":
+    groups = parse_file("xml/raw/x86/Intel/AZ.xml")
+    #groups.extend(parse_file("xml/raw/x86/Intel/AVX512_r22.xml"))
+    #groups.extend(parse_file("xml/raw/x86/Intel/AVX512_r24.xml"))
+
+    generate_table(groups)
diff --git a/src/rtdisasm_table.cgz b/src/rtdisasm_table.cgz
deleted file mode 100644
index b6b89523789f6786ae698bacc05b77dce1ca3737..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 8247
zcmZ8`cT`hL_cld}NR?iKKtL1_1Oy>;sSyZ*bd2;SU32M3?*h_msG(QshNf4lbfWZt
zQWGQ~y~7vu{@y>nb=I15X74>SXWE|U*>fkBoZM%o&5nThHO$8GjpZ8`bC{)-v#qc-
z0S-PF&kl}{sGZW$&G_i@NURuSRmTkK?6$J>cGgKCyu;S)Hy*0gb!IqVU1@q>G?hZ^
zSTy<Z6Qd&0y`TK7pDi9{1O^=K9GopKw0E2iuGhLP1o#}Rei^K-y}Z1{jb5D(#xmC`
z1f1_4Y@nA%*B4*n&9TdawMK!LqnGP}wU=ibiz1i57L63T&W;mwbOK*ooFABXbq1W5
zu9a5iU7c(U4vxA71|Cjjdv&#+oo;T^<SqDL{@&d7x|4f#5Lml#b$A~53ssx9c)7cO
z^!-l4)f%d{R-wb^Z1-X=A>pdxVn@W2VR(D5tx}<e^GfChdh@+&z~1vczlORe&Wtx+
zEasE6uif&dx?RV9$E@D7I@41CW>=t_U89Nuyqe-1YubPvX-`14rsBi59ts%4sAq@!
zo~&EU5aFnemK9KrHiRypW5j%rAsM#g2eyRCDAzO?GwQ=sTS!NUs(lUmfE$g&p%VSh
zj>WqI&u11O42D-)-|}_Zs7@PY!fGxZTU6Qx^7v<9oAhS6o_meirRXo;SpssyPz5^9
z%9xGy19#lmI#>$e*P)!}sXIGNgFPhc&-ROM_@~*!2KiNT>k%Eor_W{qLhfd7O>XJM
zxL#hk=2_2#@^@ndqI~AOyB1Uo6Sa+Idf|TI7q3`;+~#H2{2Vvz8|tZ-7=O2TdIKw5
zYz3_7ND*vI;Q?rWl`wsZm#^M_P^@9ZW4joqDx>u^9BW$c(NXx(P;vt%4rBj9iSB(E
zR#U4#IN`b*N8}7TH1Ka0wP6p9v;`YIM}N=I=Z;p3?2Nhd#OUf@rWycJmjKfDa(m2$
zQVf3d(@*dekX=*12!6n|G%#t_(+^8^@h>FdtqXDnJtL3hqx51VMu|R=y3TKJZchq5
zmS~lC7!q!J^p)*-@pG_2kB^lz$n<>%w6L<((gOL_>7T`pM%iBjW?!8e%-;BcvI>Ud
z?+NI|SQGWBOr@|P+dkYgY)KNvmME@6_orJfnq!Fd_=lu5AwY;;5`FldHzB3#uC(6Q
zx}3A~Y?q^;9NkUGlI#cGMB280Y$wp-*;}nX=?Fc2=|Z1%jTekyH_NYd9hB8&OLg?P
z#M>7h-QR=w_X7loXyN`IN&?g=Kv1}5bWC-Wx6=5biV8ELQL$TDnV7D_lUa|)EFRND
z$W+Gii8(lmb)28>#BZNgr8a6r4g_SoEqv>eQ|`vN;IHc}vWkC=$b565w&<+b7Bu`6
z;zx*9`|#x7!enLTo_dl!gQWOPFB-c%io+6XvozJm&J72Xt{IEaCr8g*YU@f+9r5I)
z&{AEC^c4vlDh}L~x?On8RX$xT*qUQtZU)JjX5dd`BiDG{xlAN!mq&_#OT3f)J-dm^
zyjA#ol`kabyzqopMMRnGS>mF<<c;ycF+zP_?JyDbeK8mTBk{47S|cfsBZrQ3VR`{^
znZ~K}lfPg6+nt;t7+Va1z^*wFrP3d3@Esa~|H^St?s`ikhAMH^-qmX#yp2)$y5mzW
zN@qS4HK|DHo?LNE_mIFUs6msUTlS@@{D%E!8=jsvp<oDtBQKqeEefF%oeK121qH>N
z+h;1%c2EKodm?qY|40ZSK|b-5iq2}`f159rxo&>HRi82_BO*RW#BQ=Wxc^S1^xwWv
zjwQ-gcW$leMB&`KN6bb)TJbFejX1yca6!tiTRTo&@`gu{q60qD+3vzf_>7s*mil!V
zjz%C=i%{X4I-znvSsPpOM27<mLG%wD+=qfqTSm_W$Ob6ad#18Z%V;-yfU{AT2l4p3
zJMncw9H^v6bw$OXw(NPwby^mLGcdkl!Z;GUV9wu7&-{6QYCf*25+lS-Y)@=+=%roQ
z$CfrYv6M;WA!C?TzDB!I{Wi1U;o8j(h~xsH30_qgw}0Tsn{7w>oqsfF*J@gE1`j5<
z`{qTv@jPYnhzomCqTGE5yCFoKP9KT{C%z5<Ip!=XTsni#ezsbG=Z;$~N;gPP&(1D?
zH_*khEDXN20d?2B1-|U`zQvL`B6EPQeViB?roHa6Y~RN|G50ojkMv?@pdZG;KB|$!
zPu;+)a{mR98-9C+v8%mwn}(6^)jwVh2$7<OxJACx>igg!3=&8bqky<Msh7NI*^!MB
z$sHJImduW~UY>{Q^Mu1Sa|W<3xPI7z<bsg~PkO@4pQ`C&iqU-Ta__YJ?yc9M-I;h_
zjI8W?_V>fKBy-7jdc>t?ChbsE4IV|1qx@scLOR|@f1h%Vc>CFKB5|H`4BNNa{NtB!
za;ddh^N%wIArLx4Zg;I#xm21gN!93cJxNd40E%(bMhFzi7``73E>G2*N438>{_B5I
z1^@UP+P9P0Ndm9*2ly+j=9Im7Nj7D9=G^%ARJV`1o+36w<<y_H)h3&0+?VHsrHeg;
zu09#s!JUj$8Nz~t0NvkFv17ejs&Xm~>XmG+tzzssPIU{7h04C_jaut~@n2-CQwwTQ
z><uStY?&i$P>5-=a4K>wz?90!ndZd~-D4%%8IKoZ5~jh_0jUjY=Bp`V-)EOJ#^>&X
zWQFd~P3b7GPlPSiq!7|FpEvnNVm!mWB$u*kt-!9v_h0<zY+X59O4^Pcd}2QIoME>8
z$e(7OyMFI#|I?Gb`gAOSr<`u@n42}ebtRp$#lIUZ9S#sSlV7UwA6A%a@LPL5CpiE7
zhvx9}6W_u3Wo_E-5V-W<lmqew@vSLE&;6R?!Tr1BJZIzzvl4Y7K2za67R^Cee+N*r
z=jI$PB>Ijs?iG9#tnE92KI7>2DJ(Gv>ObsxXZ;BJ>aiiH+4fe+nL)labJg_h{SjU9
zuVz2sGBl#X?<F_SCIN*@vSxxRZ(E(GZNK3&b;g1!o`B$v<qz21+lb!)dcOv<{|0BM
zrnp{2iSroV*p|fC#&G<p>-GZo!f(2ngiOyU5}Cd33Q7QB7vm8)s)A^8_Y*<Xxr^JN
z!oO}a<y0~cY{&+X4|ksK6=J8I2D1)jcU1~M9V*_2QbcJQd|y6RsT^u{&mr}zuBl@q
zrG4LMs4iqs)H}=}V(_Il9zQi;zpr6WzkehxR9?=#(;;{%)^Rf=!!51XAKLO!4Onxc
zR4+`KWE!<e;y3Je_r-5IFS_Bb^l-0o*)gPke4w_4sD+o~NmO}ZcueZQwu2BLC&it`
zsVDmL(3k+-<CuBCspeQBw%w)uYRK4zaR!!}o<$8`Gj&0JjbfJDlBCwU-j0=87T2oe
zUt0p9xpU;DgCe2{25tLo`!Z)8ODX^*k1Se(ov~iUZcBk?xhQq{-_ssn%z9qxnj<aO
z;6a{tJq5Ec>NJVAW^9p+{ja*VP=f;THXSRJBQHr=IV%*T$6qY~4TmMb8W`iK<-}dc
z;a^0*xbR@4&o-OPDyg%#I0hFAk7CUBUlNvT4)S1PMj597$UEr<d!<`-nbTI!UYqA9
z3Jk&)B3@>aL`QfE!roVnQ9WrpcF>>^hHwe=s*4sjev|F!YBl0;p{JPS^3_m2jK!Q%
zI3+eYiR8Cc9=e3YVk+Xe+oEbrhhC}}xhH`B!EFTG1Y!lwpiRP?v6yK;)h5a-9^@;&
zFuWyP;dO042j89yBk~n<De#5vbfu^tzqLK#_g4t!aRc@n-gMBWmT};V?Fy(izCIj1
z2#B!mb8bpM{yEyvCqlIazVGltMX!Q`IT4ofE>=T;xm%Ue0z(i*DEUbxhnxrdgOfQ?
zKPMWm`P8Gz%V@I9L#okB#Pv6kLXav_K6`jGh~?85AuV`{$%Bp<K}SeSjA+Cs9JP2m
zsSPqk-X@uDRbJ>$uPSd-0>1wD??~{nWxPycjd!V$j@UT}Um9nHFa6>$iMFlww?^Vb
zG0--;0)``m3QkBckHy(PG8RHoyjA~7r;nXZvmI}8K#Fy>g>3~ZB(-sCXWb0C=Z~`y
z%$=D-=AbohY8m-ZwXetoe7W(>{MDl_o-((ER4ZNg6<8nK4QxtGKi*3PHg~l~!5_Nu
zKyHYB3V}!2{))3pj}bxtEYDCiQZRUMa_l=1i~-sIyvj-D43u2vB+rS8E{o|G33M14
zoiu0G+)zJ+-P-5pVZu0Dl0(f)B&7cwU=&Sln9kJww0ceT4e9NP`&%}K<v~kdKm>&+
zCojx<2pr3iG3e~6lcVe@zH-BqoZg(X(b0Z$TdywETA(;*_&t_WHD|<*B+Taed|`U8
zy$<;IY^zLP*V;ZeKBmXs?x*LI22%$lF4`xftqBhmn>#)c)nnV0k7t_G1ztJ(CF_nm
z%sI{W<)tnygF{PF5p6k;4-dN2M+!`0s^LptkOSlQ<#`W_-$YR~dlCyb0~(*Vdp^!*
z|E<@r)b=X$wn%{)<vk9mM<jLG`D0Wz$`}rz!AV!ew#ZP=?^(LSg_V+s>8z2@^>(vO
zaxfe#Otay;HxbOvI`<WdDwe_xx6)ZCk8I$qWALxC!h;7in-KC#nZ<Z!-ax!L*|0`S
z(rVNfXW9FVU~BSgAPG=Z?XjK<qdUQ^-BT$juu((yBuVwiux_8dG*R_JejCw{>D5!F
z_(>my4QB0-!nJ#~Kplp*cW|ZY9$AYF@k%;+HPzkNS6)fEeJ%f*P1yQL7beGe`7_#f
zTgq5JkC~ofwPp9)p1ViG04W|QAC%^;`k1pYPM<k4*L&v0R&A_(XecTMZ;8vRj>V|w
zj!v@g19ePP-S90xmDxU??jp7B+sPFV5M9nFh5PxR_kWxuK^vyVA7pwv@;-VnFm{{Y
zH#eoKPzKQTnR+#XpB6=2`YU*gC}~OAw4}({&&mitO?U@fUbK}o|2{Z_Q;M1<B_b>u
zO|u}NR-zo{Zi9zIsl$E&pfE4Akhn)$OiM~4R?6vsG@w0JE7G{q=)9I159QvaWJ(40
z&oKj0+&p~s*XYBCxaxbE<_f)BF|P_XSH_);E&IAp7sj2H2ZnV2Bnzp;+_f*NS$eWF
zxJ?R+$rC&({-t@rUkW{ip2GW(?c0}X>7qbXu(MDn6U<2TA-pXvaK$L`OmG^;#opi+
zM~qi-c=uWV-^-wRXJ=_ONq_pOigGuh!K+eD9R@4@47bo118NJ0*M-QgUr|k<*YE4F
zENfI8cdq|39YLWjU1%i6qSzCnbDw}NvV8iGi}Qpu=9_7U_xVRj!LJ9Oa~pP5N|LG2
zug<I@U7Zd;r`;UO41knKqJOgW7w*Z9qVxN}>Ltp&6p17@D4)pRoo?I)-&$Q_stO;&
zb9(Er5k3}0Tfs8xPP3)yMkI-1C;F+{>CumP5{2UzsL{$w!$#0HdWppl^(W6&-ZeMZ
zVPCHj*Z8y9cG-oOm?ebQ)cTK3K;kK=kbVsSFCrL;QHBuGvU2A}qo4zw^|-Wg+P9Ze
z&^U13-a9q*3OS~~TsQvFs304_QC5o%uB`vQF}dcSamJHBaB9i)@gF!f@hL((QW!il
z?s$w=+bK>XgfHFuP(JSX7V<qPz8a7%{2(@;J&lCJk$_qL_M*T`8wPv90P#;37GyCM
z-d9#473`aex?r~sKUGzo?<vdua0pDp3)#5I`@qbHTUInR6J`Pocsd`q=YK!+o0u(R
zJ0p<_2T7l0btKbl;R#F(((W^^43*_ik?x=ZWK#c(Fw?76aaUFt*(>HX(BQof?n$KA
zOWNQ`O2LFSaENcgFrjZuJ+snOg#w*=YORfh1I0>P3h*m#-kqNJ@&2-``B$!w1w&YX
zczppHt0x1*(Q>y_*R!2^<ciq7|Nd-kKc01FYztNnPxR#589y9@FLu^XliZAIaN{YX
zG_%^3pKi!4xKU!gc^}ax=cuD(o}*e~-O7DRyf8XH=G>tx3bR03O1eJ9f?QkKgN(&`
zDh$EhR2~*!-k=mEQ;ZE{yDsg&9vVr@`_63+Bfd%9e31>v!C@W9^cn>1Qi0|cpp1gr
zVPPHvtB624ALbSL`2_p`xrJz8dwn`n5ark1{RwzI)QXCB#X!>2;0Rkw{xsIp=S+!M
zi|EbjnRQu-+a{Uucll0JL{QxIwR2qdMAu_R`TFi}{1`l)=zIW<9^qKc@F-Qbk9VAL
zG-dL<n^AOUO|7Jeuiow`Ng%pvW;icA1ydU`H}2Ry#MA-fhcg0%!ps^_xg=F4Pjms3
zejX{fElA&F{7OV$!hzrf8!yK%-JA|Omu8%Ucd=0MV|W#GRWIY=k7m)z0rngd-MXYQ
ztGf_-#sKH{+FO4o*_0c2aCfk7>n@EJnfSg09%q7{IVTGn`dMe+5R7KSvZ+KI!~<tm
z{?14~W6~p%kB>9PTO#lsO)bwfIm;%gno%kRRe2d`H@C?E%w&?-bIX>Jh1U@R@~2`0
z5Y^RhA>X*>vrY5QmUvJGp8fQDo8$Js?D1)2RzIxnz%D*FkR>zT&S9Trq#qU)XG_1L
zeE)xo2GC7q3vp9cFDY{I!w@DVVjUvX`m}8DA&TvBJ<^gQRyMg>pcsJ1U|vYE^B&w-
z<&Uv)>rYS=``4U>h<Gjaxx+E@mr+{k;*UUfJSb5~CO!4+#t->(Jv=CG=W~~bMRP1I
z!8w>nuTd#5%{DJ&;yHbE^0z{uM<F}SCX&ejGbR$f=_`PL)6A}5{wW>I7{)&U-J#dJ
z!9MTXeK~n+bIArO*OhiRuCwtAMXUF`N|m?AU#qmC3XUA4ebHDkJ8}G@&d`v$7ran=
zv%svJ2W7(M`(tiMgSlC(s$~H9XK1J#&%aEt{`u`bcjXDr2OqgXR|#5VbB8tK>gD*+
zIk3~-RV~-K2^der(z>=%B?S!pvF4v^l_+f!@!raS`Hs&~7RuCrf?-@4D!IW%v0EHn
zPU!QR!zxW;oCd5fIV9X_@d-~79phr<T^^zj4GZ&0^J6KHtMd==K!rddu+DKZYFh9P
z1AhV^pa%1S8yfhu>Fqw+$_6ykyFqre>^jX<6&#%rl@+D^xdK4&VWPL5FMu6fQBtIU
zqtC3VXMZa95}_{EA@>sVYo#B?7vajt@!Aa|BhGjxb<N@N2jx-UX}>@{cHTu+lRaUm
zx9^RyjLPbHgg$n2*sV~O(w*JHH@XbrSeaCoxW@IhZtJ7)=}E%o`z@H`4A`_8;q9d~
z0)!JVF20v6&Oy%$;aTMHmdXKrK>AhHsmZ5uPGSBe_uGXG<F0oV76nhfxWq_1`S9J;
z-F^BvId41lTvx3pr>Q+D%=rG9XzQe2PMp>Ii+ui;e$AFPPs~TLU83b0Xs<K=*^IJc
zO1y9h$w|?a+Cy;Q5bZZ#>k0oHO5P1hu0N6oiYRQIQG&V>L%X4WI7H53@Z{<zR>^GY
zM91pTl`FoS%q#W5t0|@3tJ@<(S4HTQ>d^U=RQEM)ll*KkpXadAw_~NmK3LBw>HvXM
zW)?5p{}xKrrkkekD)0cD$a$ReK54&MJt=9wYCS9k!7PXOKKysCy-z`&Kf<=u(WG~m
z+2(E55Y`p*2}b^S{>A{+h16)en1<`UH@<!EoE`pxAQkf3(5i^HIqRCNHWGM;ZmlNS
zC<A?EkASWoW7r*d0^PoteZ?=brc=rO=Jmd7Pgj?QSmC#BQKIO!l<JuoT^K}g%;9~!
zminQh7xJ1l<t&OeOSMQB5VufI$fD@=NPVb(T$fbxo&MrpGd75rsk3JU!OZeDgSTK<
zFRYhTSMt>AWR7d>CEJNNminTGRmE%asFShb8v<46#CQ&A=(ba|KLi*`!h2&Tocm$b
zOI}|oF=Mk5yf}zIY~BSde}@jxk@ZhO`M4Z|%>X!3q|k-oxh=}|St9D}9&6^vSzWWR
z@#S3|dr7}A?NW~)#+fE*+ND+|4PY$+7-EJWI#}Ptt~iA!v2WM&(GG=H8J_e}CWkLS
zmkd%;WE9~~cK+A~qq+y#tonA(GL;C=<2#J%uwtP^YxkUy^8-tpjLw60q^Z|hf-dWA
zMrY(uevmXpwy;;dlpo1leWsG_wKmT)uGu!BWf`5XP)&>97Cti7@8(W)fOQ_LJtogQ
zbbnWHNtX`zu*hlOU*A#nuQ~qF#qt5TO8vnx&$ohD18f=>53HdeT_>a)em)B>V<8aD
zFK~JQidnxgcwn~oOZrrAt9dht^;YyTv95=TF;55`^Hg+Mkau!avSPdH9gBatklKUb
zZY16CD2xZswt%;FiN~F@PzYK^1>y848<l;o;)hX~U1OdN?p+#EI$B~IbhL4l)UpU#
zmv-it6;B0sefrkDD8k7e6(MH|gH5(a`QkE1<&VB)Nr1}n;OY<@>MAHtWr>d{Z9YT-
zlQoM+JB4#E2R46Mv+3c_G1l2lptryuCv7@^IA5$kpMon^*$xqdd!3$E|IFD(&(qgN
zj4hhPXc^74PgE8Rl$8a>_;Pi)VCAtk3b*Z1tC$-<+=(RNF}CV^Vd&5G&bZgGC-IOV
z^`3{gyR;3P9U<E3tPBs;Em5lwCi_|oGfDBXp?v&I9RvJoEl{?kBtyE>u#d?-GlJ7;
z!;}KoT7tKI3wU^npVv1GlF%(1k3))PTk+7Z*V>D#P$QZdLx00`7&sl)_}H*|ru{A>
zsDWF#2}^xM!34bsIOGi22u5Yv<~3nA?4RMU7C!FKg2)DwIyq6$xZ#)E+Cd|m(M~SN
zP9clevx&KDszu^{-KN<i+yWXX78zLCXAl&$|0shS_GQF|eoXj6_+sQ`+p^A&J7eNA
z|8gZoSmG(NTtq@_6lW<bsc29EF;}62F9WHlA{!k6-NouV{EX2Ihbi#_PL}XA8hA~k
zuM?@4B>3p>H=4gRdVFz*GL(fYjy#BktC%W?<%c&Tf#@e)1p+H2<rh4cio6mG-KzS|
zhFK4-f^3BF(p%nm=}I6c7hP!xiv=-dP}#JAyk$2*dIxX!h?hiXz>MwQal?bU=2NGm
zWj)ClvJ6Wa`*?phOt<=LhH4kY(i2g~8)Z^ezvQTJ`<`4iA_9|2)%yBEVnq7;F&Gcl
zS;0bZ=QP;saPh{A=r|6(FT(ny-)@vBS3SN)<KYT7zdhl2X25ayb&*7rsyY~B>(CI|
zphjiYb$>B`y<W9M8BzNmZ1<Fmp!GEJke6d*|A+CaVOb79q%oxkU~EA!MF|7SJ*qX0
zEM}fcr^}L98I%9IMV_atge2LXC`eD`hEdZ$nw~9QEMD|+Vd4AEVxkyV_=8gJhj#?0
zyKI=Nv4gi0e`EwHz(4F^cDx+oF&T$BB%L@O+9>lP?!ZY<e4pG>3gD};O$*vvi*uMQ
zP$&O_9V3p99FtZd0n1>Q^kv~67nyIAR5~xyGnWScr5O#eD1F`KC776i<c7eO2t;#%
zsQgWCS=w%%{3uwY3m?&urz7O@Jy{B(vw{dvD{+(*s)6k+n-|K)G9_MpE2#|p_^c2f
zaNpf&5-U%L3}FmcHR`%iee%HULg*lOzfpn>J`*}Q3p<E)xrYlAUlZ#$k&>2>I#Qj?
zv{ElsDmVR5gerR{4Ir$Bcqr8XO+EOIuHynK9P0SE02zg)=&&cAw*i1*#J4hFJaA)8
zZBQbl$`FZ)c2V1cIS<z3y(GWnP<%S1XksoVK~MlC5m>^05PR=NVcSt=wICbp&3*;H
zj>2I{vfEGxHq8Oq=xi$aAUjPPe>brrLg<4UxX@gHX+DCQdN!s>WSdu7nLI+@)=tM8
zl-Q!`9ZiVf=Ptf(NRlkq=n~DpvYAHwg~jDY#bdZ^>bLCwUoPB&mjh@2%Ht>NG0{(V
z)y+DvE9un>c!2O<`+Q?^866DIJ_!A-_u4L)Da$-xyiO=f_|N}cH-7^e`5@CZxWudw
zJ5gWMv`MKQnd|s|w%D^A`VcpCSE+0)I<Em%s)^HU`&N_XSxp2-t304WN}mq5or@kI
z=&@suiQGZ3R5D1UybB(vSM|otd<@YpwVaXJhme*k>x^5V(D@H;;X~?a+L3QRKYX0c
zoL`?@pfmTtOs2s!h!id13sDV~KbRgbKZeh&Kb?jNq)AljmpMdULb~;vo);+B_8wtQ
zp%Zaw)X*+X<cA4pk^LYq+5tOeLXblpP@{ntNnqfVkjPHL?+B-F*ewRH`dL_<I9%53
zE;e@@eyow{{SydmCX4C`o;z9l?Rj~T{biTFd_@b=LYo!aqMS-6qxUv;_i)D^xD~SC
zv}(B7=ut|)$ec0;efDOn_2&i_wQ2SNXUSsap>ii7CDjU{#iH9jKodE=OZUr7s8VJ%
ziW6Yv2f4&Gi|fSJh=e_F`|!%%J&$*?Bv}~y4#V*41$aGUtJw1yF6Q~Og60oQZfS}+
z2vgm@S*N6?SPZN|z#R7od3)Vz6d?mQ*`F+7U-Md+9SHZQLY{>jxZ)!~i~|36eI(eb
zZGmUghC6rF6wD{vZ{=_RurG_EYA&UzYT!%gtrg7lFX-o&p9{sOYMwK;c3dp1ybj&6
zO~4;9y?wP=z+3$#g5cEeLox_e7vs*&5Da3eA|FYnqGhgSj^HX0DG|}5Jrj#6VbDeT
z1DnplIg9Lkm&bGeyM=E_FkiUX-N9v7j+!sFpY3i8My8vaUtC<!Gz{7Ab#)vEmYfag
JtT&Pn{6C({L}ma0

diff --git a/xml/LICENSE b/xml/LICENSE
new file mode 100644
index 0000000..6a0e90a
--- /dev/null
+++ b/xml/LICENSE
@@ -0,0 +1,22 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Mahdi Safsafi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/xml/raw/x86/AMD/3DNow.xml b/xml/raw/x86/AMD/3DNow.xml
new file mode 100644
index 0000000..cf11a68
--- /dev/null
+++ b/xml/raw/x86/AMD/3DNow.xml
@@ -0,0 +1,210 @@
+<?xml version="1.0" encoding="ASCII"?>
+<!DOCTYPE instrs SYSTEM "3DNow_Rules.dtd">
+<!-- Copyright (c) 2015 Mahdi Safsafi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+ -->
+<!--   https://github.com/MahdiSafsafi/Parsable-Instructions   -->
+<!-- 
+  This XML file includes all instructions found in :
+  3DNow! Technology Manual 21928G/0-March 2000 document.
+ -->
+<instrs version="1.00">
+	<common>
+		<brief>FEMMS</brief>
+		<ins>
+			<mnem>FEMMS</mnem>
+			<args>void</args>
+			<opc>0F 0E</opc>
+			<dscrp>Faster Enter/Exit of the MMX or floating-point state.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PAVGUSB</brief>
+		<ins>
+			<mnem>PAVGUSB</mnem>
+			<args>mmreg1,mmreg2/mem64</args>
+			<opc>0F 0F  /r BF</opc>
+			<dscrp>Average of unsigned packed 8-bit values.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PF2ID</brief>
+		<ins>
+			<mnem>PF2ID</mnem>
+			<args>mmreg1,mmreg2/mem64</args>
+			<opc>0F 0F /r 1D</opc>
+			<dscrp>Converts packed floating-point operand to packed 32-bit integer.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PFACC</brief>
+		<ins>
+			<mnem>PFACC</mnem>
+			<args>mmreg1,mmreg2/mem64</args>
+			<opc>0F 0F /r AE</opc>
+			<dscrp>Floating-point accumulate.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PFADD</brief>
+		<ins>
+			<mnem>PFADD</mnem>
+			<args>mmreg1,mmreg2/mem64</args>
+			<opc>0F 0F /r 9E</opc>
+			<dscrp>Packed, floating-point addition.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PFCMPEQ</brief>
+		<ins>
+			<mnem>PFCMPEQ</mnem>
+			<args>mmreg1,mmreg2/mem64</args>
+			<opc>0F 0F /r B0</opc>
+			<dscrp>Packed floating-point comparison, equal to.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PFCMPGE</brief>
+		<ins>
+			<mnem>PFCMPGE</mnem>
+			<args>mmreg1,mmreg2/mem64</args>
+			<opc>0F 0F /r 90</opc>
+			<dscrp>Packed floating-point comparison, greater than or equal to.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PFCMPGT</brief>
+		<ins>
+			<mnem>PFCMPGT</mnem>
+			<args>mmreg1,mmreg2/mem64</args>
+			<opc>0F 0F /r A0</opc>
+			<dscrp>Packed floating-point comparison, greater than.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PFMAX</brief>
+		<ins>
+			<mnem>PFMAX</mnem>
+			<args>mmreg1,mmreg2/mem64</args>
+			<opc>0F 0F /r A4</opc>
+			<dscrp>Packed floating-point maximum.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PFMIN</brief>
+		<ins>
+			<mnem>PFMIN</mnem>
+			<args>mmreg1,mmreg2/mem64</args>
+			<opc>0F 0F /r 94</opc>
+			<dscrp>Packed floating-point minimum.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PFMUL</brief>
+		<ins>
+			<mnem>PFMUL</mnem>
+			<args>mmreg1,mmreg2/mem64</args>
+			<opc>0F 0F /r B4</opc>
+			<dscrp>Packed floating-point multiplication.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PFRCP</brief>
+		<ins>
+			<mnem>PFRCP</mnem>
+			<args>mmreg1,mmreg2/mem64</args>
+			<opc>0F 0F /r 96</opc>
+			<dscrp>Floating-point reciprocal approximation.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PFRCPIT1</brief>
+		<ins>
+			<mnem>PFRCPIT1</mnem>
+			<args>mmreg1,mmreg2/mem64</args>
+			<opc>0F 0F /r A6</opc>
+			<dscrp>Packed floating-point reciprocal, first iteration step.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PFRCPIT2</brief>
+		<ins>
+			<mnem>PFRCPIT2</mnem>
+			<args>mmreg1,mmreg2/mem64</args>
+			<opc>0F 0F /r B6</opc>
+			<dscrp>Packed  floating-point  reciprocal/reciprocal  square root, second iteration step.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PFRSQIT1</brief>
+		<ins>
+			<mnem>PFRSQIT1</mnem>
+			<args>mmreg1,mmreg2/mem64</args>
+			<opc>0F 0F /r A7</opc>
+			<dscrp>Packed  floating-point  reciprocal  square  root,  first iteration step.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PFRSQRT</brief>
+		<ins>
+			<mnem>PFRSQRT</mnem>
+			<args>mmreg1,mmreg2/mem64</args>
+			<opc>0F 0F /r 97</opc>
+			<dscrp>Floating-point reciprocal square root approximation.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PFSUB</brief>
+		<ins>
+			<mnem>PFSUB</mnem>
+			<args>mmreg1,mmreg2/mem64</args>
+			<opc>0F 0F /r 9A</opc>
+			<dscrp>Packed floating-point subtraction.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PFSUBR</brief>
+		<ins>
+			<mnem>PFSUBR</mnem>
+			<args>mmreg1,mmreg2/mem64</args>
+			<opc>0F 0F /r AA</opc>
+			<dscrp>Packed floating-point reverse subtraction.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PI2FD</brief>
+		<ins>
+			<mnem>PI2FD</mnem>
+			<args>mmreg1,mmreg2/mem64</args>
+			<opc>0F 0F /r 0D</opc>
+			<dscrp>Packed 32-bit integer to floating-point conversion.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>PREFETCHW</brief>
+		<ins>
+			<mnem>PREFETCHW</mnem>
+			<args>mem8</args>
+			<opc>0F 0D</opc>
+			<dscrp>Prefetch processor cache line into L1 data cache (Dcache).</dscrp>
+		</ins>
+	</common>
+</instrs>
\ No newline at end of file
diff --git a/xml/raw/x86/AMD/3DNow_Rules.dtd b/xml/raw/x86/AMD/3DNow_Rules.dtd
new file mode 100644
index 0000000..f6352cc
--- /dev/null
+++ b/xml/raw/x86/AMD/3DNow_Rules.dtd
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="ASCII"?>
+<!-- 3DNow rules -->
+
+<!ELEMENT instrs (common+)>
+<!ELEMENT common (brief,ins)>
+<!ELEMENT ins (mnem,args,opc,dscrp)>
+
+<!ELEMENT brief (#PCDATA)>
+<!ELEMENT mnem (#PCDATA)>
+<!ELEMENT args (#PCDATA)>
+<!ELEMENT opc (#PCDATA)>
+<!ELEMENT dscrp (#PCDATA)>
+
+
+<!ATTLIST instrs version CDATA #REQUIRED>
\ No newline at end of file
diff --git a/xml/raw/x86/AMD/SSE5.xml b/xml/raw/x86/AMD/SSE5.xml
new file mode 100644
index 0000000..6cf06dc
--- /dev/null
+++ b/xml/raw/x86/AMD/SSE5.xml
@@ -0,0 +1,1119 @@
+<?xml version="1.0" encoding="ASCII"?>
+<!DOCTYPE instrs SYSTEM "SSE5_Rules.dtd">
+<!-- Copyright (c) 2015 Mahdi Safsafi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+ -->
+<!--   https://github.com/MahdiSafsafi/Parsable-Instructions   -->
+<!-- 
+  This XML file includes all instructions found in :
+  128-Bit SSE5 Instruction Set Pub No 43479 Rev 3.01 Date August 2007 document.
+ -->
+<instrs version="1.00">
+	<common>
+		<brief>COMPD--Compare Vector Double-Precision Floating-Point.</brief>
+		<dscrp>Compares two packed double-precision floating-point values in XMM2 register by XMM3 register or 128-bit memory location and writes 64 bits of all 1s (TRUE) or all 0s (FALSE) in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>COMPD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,imm8</args>
+			<opc>0F 25 2D /r /drex0 ib</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>COMPS--Compare Vector Single-Precision Floating-Point.</brief>
+		<dscrp>Compares four packed single-precision floating-point values in XMM2 register by XMM3 register or 128-bit memory location and writes 32 bits of all 1s (TRUE) or all 0s (FALSE) in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>COMPS</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,imm8</args>
+			<opc>0F 25 2C /r /drex0 ib</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>COMSD--Compare Scalar Double-Precision Floating-Point.</brief>
+		<dscrp>Compares the low-order double-precision floating-point value in XMM2 register by the low-order double-precision floating-point value in XMM3 register or 64-bit memory location and writes 64 bits of all 1s (TRUE) or all 0s (FALSE) in the low-order quadword in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>COMSD</mnem>
+			<args>xmm1,xmm2,xmm3/mem64,imm8</args>
+			<opc>0F 25 2F /r /drex0 ib</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>COMSS--Compare Scalar Single-Precision Floating-Point.</brief>
+		<dscrp>Compares the low-order single-precision floating-point value in XMM2 register by the low-order single-precision floating-point value in XMM3 register or 32-bit memory location and writes 32 bits of all 1s (TRUE) or all 0s (FALSE) in the low-order doubleword in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>COMSS</mnem>
+			<args>xmm1,xmm2,xmm3/mem32,imm8</args>
+			<opc>0F 25 2E /r /drex0 ib</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>CVTPH2PS--Convert 16-Bit Floating-Point to Single-Precision Floating-Point.</brief>
+		<dscrp>Converts four packed 16-bit floating-point values in the low 64 bits of XMM2 or 64-bit memory location to four single-precision floating-point values and writes the results in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>CVTPH2PS</mnem>
+			<args>xmm1,xmm2/mem64</args>
+			<opc>0F 7A 30 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>CVTPS2PH--Convert Single-Precision Floating-Point to 16-Bit Floating-Point.</brief>
+		<dscrp>Converts four packed single-precision floating-point values in XMM2 to four 16-bit floating-point values and writes the results in the destination (XMM1 register or memory location).</dscrp>
+		<ins>
+			<mnem>CVTPS2PH</mnem>
+			<args>xmm1/mem64,xmm2</args>
+			<opc>0F 7A 31 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FMADDPD--Multiply and Add Packed Double-Precision Floating-Point.</brief>
+		<dscrp>Multiplies two packed double-precision floating-point values in the second and third operands, then adds the products to the fourth operand and writes the results in the destination (first operand).</dscrp>
+		<ins>
+			<mnem>FMADDPD</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 01 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FMADDPD</mnem>
+			<args>xmm1,xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 01 /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>FMADDPD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 05 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FMADDPD</mnem>
+			<args>xmm1,xmm3/mem128,xmm2,xmm1</args>
+			<opc>0F 24 05 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FMADDPS--Multiply and Add Packed Single-Precision Floating-Point.</brief>
+		<dscrp>Multiplies four packed single-precision floating-point values in the second and third operands, then adds the products to the fourth operand and writes the results in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FMADDPS</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 00 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FMADDPS</mnem>
+			<args>xmm1,xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 00 /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>FMADDPS</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 04 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FMADDPS</mnem>
+			<args>xmm1,xmm3/mem128,xmm2,xmm1</args>
+			<opc>0F 24 04 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FMADDSD--Multiply and Accumulate Scalar Double-Precision Floating-Point.</brief>
+		<dscrp>Multiplies double-precision floating-point value in the loworder quadword of the second and third operands, then adds the product to the double-precision floating-point value in the loworder quadword of the fourth operand and writes the result in the low order quadword of the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FMADDSD</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem64</args>
+			<opc>0F 24 03 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FMADDSD</mnem>
+			<args>xmm1,xmm1,xmm3/mem64,xmm2</args>
+			<opc>0F 24 03 /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>FMADDSD</mnem>
+			<args>xmm1,xmm2,xmm3/mem64,xmm1</args>
+			<opc>0F 24 07 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FMADDSD</mnem>
+			<args>xmm1,xmm3/mem64,xmm2,xmm1</args>
+			<opc>0F 24 07 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FMADDSS--Multiply and Add Scalar Single-Precision Floating-Point.</brief>
+		<dscrp>Multiplies packed single-precision floating-point values in low-order doubleword of the second and third operands, then adds the product to low-order doubleword of the fourth operand and writes the result in the low-order doubleword of the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FMADDSS</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem32</args>
+			<opc>0F 24 02 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FMADDSS</mnem>
+			<args>xmm1,xmm1,xmm3/mem32,xmm2</args>
+			<opc>0F 24 02 /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>FMADDSS</mnem>
+			<args>xmm1,xmm2,xmm3/mem32,xmm1</args>
+			<opc>0F 24 06 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FMADDSS</mnem>
+			<args>xmm1,xmm3/mem32,xmm2,xmm1</args>
+			<opc>0F 24 06 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FMSUBPD--Multiply and Subtract Packed Double-Precision Floating-Point.</brief>
+		<dscrp>Multiplies two packed double-precision floating-point values in the second and third operands, then subtracts the corresponding two packed double-precision floating-point values in the fourth operand from the products and writes the quadword results in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FMSUBPD</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 09 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FMSUBPD</mnem>
+			<args>xmm1,xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 09 /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>FMSUBPD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 0D /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FMSUBPD</mnem>
+			<args>xmm1,xmm3/mem128,xmm2,xmm1</args>
+			<opc>0F 24 0D /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FMSUBPS--Multiply and Subtract Packed Single-Precision Floating-Point.</brief>
+		<dscrp>Multiplies four packed single-precision floating-point values in the first and second source operands, then subtracts the corresponding four packed single-precision floating-point values in the third operand from the products and writes the doubleword results in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FMSUBPS</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 08 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FMSUBPS</mnem>
+			<args>xmm1,xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 08 /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>FMSUBPS</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 0C /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FMSUBPS</mnem>
+			<args>xmm1,xmm3/mem128,xmm2,xmm1</args>
+			<opc>0F 24 0C /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FMSUBSD--Multiply and Subtract Scalar Double-Precision Floating-Point.</brief>
+		<dscrp>Multiplies double-precision floating-point value in the loworder quadword of the second and third operands, then subtracts the double-precision floating-point values in the fourth operand from the product and writes the result in the low order quadword of the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FMSUBSD</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem64</args>
+			<opc>0F 24 0B /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FMSUBSD</mnem>
+			<args>xmm1,xmm1,xmm3/mem64,xmm2</args>
+			<opc>0F 24 0B /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>FMSUBSD</mnem>
+			<args>xmm1,xmm2,xmm3/mem64,xmm1</args>
+			<opc>0F 24 0F /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FMSUBSD</mnem>
+			<args>xmm1,xmm3/mem64,xmm2,xmm1</args>
+			<opc>0F 24 0F /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FMSUBSS--Multiply and Subtract Scalar Single-Precision Floating-Point.</brief>
+		<dscrp>Multiplies single-precision floating-point value in the loworder doubleword of the second and third operands, then subtracts the single-precision floating-point values in the low-order doubleword of the fourth operand from the product and writes the result in the low-order doubleword of the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FMSUBSS</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem32</args>
+			<opc>0F 24 0A /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FMSUBSS</mnem>
+			<args>xmm1,xmm1,xmm3/mem32,xmm2</args>
+			<opc>0F 24 0A /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>FMSUBSS</mnem>
+			<args>xmm1,xmm2,xmm3/mem32,xmm1</args>
+			<opc>0F 24 0E /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FMSUBSS</mnem>
+			<args>xmm1,xmm3/mem32,xmm2,xmm1</args>
+			<opc>0F 24 0E /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FNMADDPD--Negative Multiply and Add Packed Double-Precision Floating-Point.</brief>
+		<dscrp>Multiplies two packed double-precision floating-point values in the second and third operands, then negates the products and adds them to the fourth operand and writes the results in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FNMADDPD</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 11 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FNMADDPD</mnem>
+			<args>xmm1,xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 11 /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>FNMADDPD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 15 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FNMADDPD</mnem>
+			<args>xmm1,xmm3/mem128,xmm2,xmm1</args>
+			<opc>0F 24 15 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FNMADDPS--Negative Multiply and Add Packed Single-Precision Floating-Point.</brief>
+		<dscrp>Multiplies four packed single-precision floating-point values in the second and third operands, then negates the products and adds them to the fourth operand and writes the results in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FNMADDPS</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 10 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FNMADDPS</mnem>
+			<args>xmm1,xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 10 /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>FNMADDPS</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 14 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FNMADDPS</mnem>
+			<args>xmm1,xmm3/mem128,xmm2,xmm1</args>
+			<opc>0F 24 14 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FNMADDSD--Negate Multiply and Add Scalar Double-Precision Floating-Point.</brief>
+		<dscrp>Multiplies double-precision floating-point value in the loworder quadword of the second and third operands, then negates the product and adds it to the double-precision floating-point value in the loworder quadword of the fourth operand and writes the result in the low order quadword of the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FNMADDSD</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem64</args>
+			<opc>0F 24 13 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FNMADDSD</mnem>
+			<args>xmm1,xmm1,xmm3/mem64,xmm2</args>
+			<opc>0F 24 13 /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>FNMADDSD</mnem>
+			<args>xmm1,xmm2,xmm3/mem64,xmm1</args>
+			<opc>0F 24 17 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FNMADDSD</mnem>
+			<args>xmm1,xmm3/mem64,xmm2,xmm1</args>
+			<opc>0F 24 17 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FNMADDSS--Negative Multiply and Add Scalar Single-Precision Floating-Point.</brief>
+		<dscrp>Multiplies single-precision floating-point values in loworder doubleword of the second and third operands, then negates the product and adds it to low-order doubleword of fourth operand and writes the result in the loworder doubleword of the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FNMADDSS</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem32</args>
+			<opc>0F 24 12 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FNMADDSS</mnem>
+			<args>xmm1,xmm1,xmm3/mem32,xmm2</args>
+			<opc>0F 24 12 /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>FNMADDSS</mnem>
+			<args>xmm1,xmm2,xmm3/mem32,xmm1</args>
+			<opc>0F 24 16 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FNMADDSS</mnem>
+			<args>xmm1,xmm3/mem32,xmm2,xmm1</args>
+			<opc>0F 24 16 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FNMSUBPD--Negative Multiply and Subtract Packed Double-Precision Floating-Point.</brief>
+		<dscrp>Multiplies two packed double-precision floating-point values in the second and third operands, then subtracts the corresponding two packed double-precision floatingpoint values in the fourth operand from the negated products and writes the quadword results in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FNMSUBPD</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 19 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FNMSUBPD</mnem>
+			<args>xmm1,xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 19 /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>FNMSUBPD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 1D /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FNMSUBPD</mnem>
+			<args>xmm1,xmm3/mem128,xmm2,xmm1</args>
+			<opc>0F 24 1D /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FNMSUBPS--Negative Multiply and Subtract Packed Single-Precision Floating-Point.</brief>
+		<dscrp>Multiplies four packed single-precision floating-point values in the second and third operands, then subtracts the corresponding four packed single-precision floating-point values in the fourth operand from the negated products and writes the doubleword results in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FNMSUBPS</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 18 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FNMSUBPS</mnem>
+			<args>xmm1,xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 18 /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>FNMSUBPS</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 1C /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FNMSUBPS</mnem>
+			<args>xmm1,xmm3/mem128,xmm2,xmm1</args>
+			<opc>0F 24 1C /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FNMSUBSD--Negative Multiply and Subtract Scalar Double-Precision Floating-Point.</brief>
+		<dscrp>Multiplies double-precision floating-point value in the loworder quadword of the second and third operands, then subtracts the double-precision floating-point values in the fourth operand from the negated product and writes the result in the low order quadword of the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FNMSUBSD</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem64</args>
+			<opc>0F 24 1B /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FNMSUBSD</mnem>
+			<args>xmm1,xmm1,xmm3/mem64,xmm2</args>
+			<opc>0F 24 1B /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>FNMSUBSD</mnem>
+			<args>xmm1,xmm2,xmm3/mem64,xmm1</args>
+			<opc>0F 24 1F /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FNMSUBSD</mnem>
+			<args>xmm1,xmm3/mem64,xmm2,xmm1</args>
+			<opc>0F 24 1F /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FNMSUBSS--Negative Multiply and Subtract Scalar Single-Precision Floating-Point.</brief>
+		<dscrp>Multiplies single-precision floating-point value in the loworder doubleword of the second and third operands, then subtracts the single-precision floating-point values in the loworder doubleword of the fourth operand from the negated product and writes the result in the low-order doubleword of the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FNMSUBSS</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem32</args>
+			<opc>0F 24 1A /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FNMSUBSS</mnem>
+			<args>xmm1,xmm1,xmm3/mem32,xmm2</args>
+			<opc>0F 24 1A /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>FNMSUBSS</mnem>
+			<args>xmm1,xmm2,xmm3/mem32,xmm1</args>
+			<opc>0F 24 1E /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>FNMSUBSS</mnem>
+			<args>xmm1,xmm3/mem32,xmm2,xmm1</args>
+			<opc>0F 24 1E /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FRCZPD--Extract Fraction Packed Double-Precision Floating-Point.</brief>
+		<dscrp>Extracts the fractional portion of each of two packed double-precision floating-point values in XMM2 register or 128-bit memory location and writes quadword results in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FRCZPD</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>0F 7A 11 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FRCZPS--Extract Fraction Packed Single-Precision Floating-Point.</brief>
+		<dscrp>Extracts the fractional portion of each of four packed single-precision floating-point values in XMM2 register or 128-bit memory location and writes corresponding doubleword results in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FRCZPS</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>0F 7A 10 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FRCZSD--Extract Fraction Scalar Double-Precision Floating-Point.</brief>
+		<dscrp>Extracts the fractional portion of the double-precision floating-point value in the low-order quadword of the XMM2 register or 64-bit memory location and writes the result in the low-order quadword of the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FRCZSD</mnem>
+			<args>xmm1,xmm2/mem64</args>
+			<opc>0F 7A 13 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>FRCZSS--Extract Fraction Scalar Single-Precision Floating Point.</brief>
+		<dscrp>Extracts the fractional portion of the single-precision floating-point value in the low-order doubleword of the XMM2 register or 32-bit memory location and writes the result in the low-order doubleword of the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>FRCZSS</mnem>
+			<args>xmm1,xmm2/mem32</args>
+			<opc>0F 7A 12 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PCMOV--Vector Conditional Moves.</brief>
+		<dscrp>For each bit position of the 128 bit field, moves the bit value from the second source operand to the destination (xmm1 register) when the associated bit in the fourth source operand =1; otherwise, moves bit value from the third source operand to the destination.</dscrp>
+		<ins>
+			<mnem>PCMOV</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 22 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PCMOV</mnem>
+			<args>xmm1,xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 22 /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>PCMOV</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 26 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PCMOV</mnem>
+			<args>xmm1,xmm3/mem128,xmm2,xmm1</args>
+			<opc>0F 24 26 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PCOMB--Compare Vector Signed Bytes.</brief>
+		<dscrp>Compares signed bytes in XMM2 register with corresponding byte in XMM3 register or 128-bit memory location and writes 8 bits of all 1s (TRUE) or all 0s (FALSE) in the corresponding byte in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>PCOMB</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,imm8</args>
+			<opc>0F 25 4C /r /drex0 ib</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PCOMD--Compare Vector Signed Doublewords.</brief>
+		<dscrp>Compares signed doublewords in XMM2 register with corresponding doubleword in XMM3 register or 128-bit memory location and writes 32 bits of all 1s (TRUE) or all 0s (FALSE) in the corresponding doubleword in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>PCOMD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,imm8</args>
+			<opc>0F 25 4E /r /drex0 ib</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PCOMQ--Compare Vector Signed Quadwords.</brief>
+		<dscrp>Compares signed quadwords in XMM2 register with corresponding quadword in XMM3 register or 128-bit memory location and writes 64 bits of all 1s (TRUE) or all 0s (FALSE) in the corresponding quadword in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>PCOMQ</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,imm8</args>
+			<opc>0F 25 4F /r /drex0 ib</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PCOMUB--Compare Vector Unsigned Bytes.</brief>
+		<dscrp>Compares unsigned bytes in XMM2 register with corresponding byte in XMM3 register or 128-bit memory location and writes 8 bits of all 1s (TRUE) or all 0s (FALSE) in the corresponding byte in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>PCOMUB</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,imm8</args>
+			<opc>0F 25 6C /r /drex0 ib</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PCOMUD--Compare Vector Unsigned Doublewords.</brief>
+		<dscrp>Compares unsigned doublewords in XMM2 register with corresponding doubleword in XMM3 register or 128-bit memory location and writes 32 bits of all 1s (TRUE) or all 0s (FALSE) in the corresponding doubleword in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>PCOMUD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,imm8</args>
+			<opc>0F 25 6E /r /drex0 ib</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PCOMUQ--Compare Vector Unsigned Quadwords.</brief>
+		<dscrp>Compares unsigned quadwords in XMM2 register with corresponding quadword in XMM3 register or 128-bit memory location and writes 64 bits of all 1s (TRUE) or all 0s (FALSE) in the corresponding quadword in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>PCOMUQ</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,imm8</args>
+			<opc>0F 25 6F /r /drex0 ib</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PCOMUW--Compare Vector Unsigned Words.</brief>
+		<dscrp>Compares unsigned words in XMM2 register with corresponding word in XMM3 register or 128-bit memory location and writes 16 bits of all 1s (TRUE) or all 0s (FALSE) in the corresponding word in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>PCOMUW</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,imm8</args>
+			<opc>0F 25 6D /r /drex0 ib</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PCOMW--Compare Vector Signed Words.</brief>
+		<dscrp>Compares signed words in XMM2 register with corresponding word in XMM3 register or 128-bit memory location and writes 16 bits of all 1s (TRUE) or all 0s (FALSE) in the corresponding word in the destination (XMM1 register).</dscrp>
+		<ins>
+			<mnem>PCOMW</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,imm8</args>
+			<opc>0F 25 4D /r /drex0 ib</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PERMPD--Permute Double-Precision Floating-Point.</brief>
+		<dscrp>For each double-precision result, uses corresponding control byte in the fourth operand to perform an operation on one of 4 double-precision operands from the second and third source operands and writes result in destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PERMPD</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 21 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PERMPD</mnem>
+			<args>xmm1,xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 21 /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>PERMPD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 25 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PERMPD</mnem>
+			<args>xmm1,xmm3/mem128,xmm2,xmm1</args>
+			<opc>0F 24 25 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PERMPS--Permute and Modify Single-Precision Floating Point.</brief>
+		<dscrp>For each single-precision result, uses corresponding control byte in the fourth operand to perform an operation on one of 8 single-precision operands from the second and third source operands and writes result in destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PERMPS</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 20 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PERMPS</mnem>
+			<args>xmm1,xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 20 /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>PERMPS</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 24 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PERMPS</mnem>
+			<args>xmm1,xmm3/mem128,xmm2,xmm1</args>
+			<opc>0F 24 24 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PHADDBD--Packed Horizontal Add Signed Byte to Signed Doubleword.</brief>
+		<dscrp>Adds four successive 8-bit signed integer values in an XMM register or 128-bit memory location and packs the 32-bit results in the destination XMM register.</dscrp>
+		<ins>
+			<mnem>PHADDBD</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>0F 7A 42 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PHADDBQ--Packed Horizontal Add Signed Byte to Signed Quadword.</brief>
+		<dscrp>Adds eight successive 8-bit signed integer values in an XMM register or 128-bit memory location and packs the 32-bit results in the destination XMM register.</dscrp>
+		<ins>
+			<mnem>PHADDBQ</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>0F 7A 43 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PHADDBW--Packed Horizontal Add Signed Byte to Signed Word.</brief>
+		<dscrp>Adds each adjacent pair of 8-bit signed integer values in an XMM register or 128-bit memory location and packs the 16-bit results in the destination XMM register.</dscrp>
+		<ins>
+			<mnem>PHADDBW</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>0F 7A 41 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PHADDDQ--Packed Horizontal Add Signed Doubleword to Signed Quadword.</brief>
+		<dscrp>Adds each adjacent pair of 32-bit signed integer values in an XMM register or 128-bit memory location and packs the 64-bit results in the destination XMM register.</dscrp>
+		<ins>
+			<mnem>PHADDDQ</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>0F 7A 4B /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PHADDUBD--Packed Horizontal Add Unsigned Byte to Doubleword.</brief>
+		<dscrp>Adds four successive 8-bit unsigned integer values in an XMM register or 128-bit memory location and packs the 32-bit results in the destination XMM register.</dscrp>
+		<ins>
+			<mnem>PHADDUBD</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>0F 7A 52 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PHADDUBQ--Packed Horizontal Add Unsigned Byte to Quadword.</brief>
+		<dscrp>Adds eight successive 8-bit unsigned integer values in an XMM register or 128-bit memory location and packs the 64-bit results in the destination XMM register.</dscrp>
+		<ins>
+			<mnem>PHADDUBQ</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>0F 7A 53 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PHADDUBW--Packed Horizontal Add Unsigned Byte to Word.</brief>
+		<dscrp>Adds each adjacent pair of 8-bit unsigned integer values in an XMM register or 128-bit memory location and packs the 16-bit results in the destination XMM register.</dscrp>
+		<ins>
+			<mnem>PHADDUBW</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>0F 7A 51 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PHADDUDQ--Packed Horizontal Add Unsigned Doubleword to Quadword.</brief>
+		<dscrp>Adds each adjacent pair of 32-bit unsigned integer values in an XMM register or 128-bit memory location and packs the 64-bit results in the destination XMM register.</dscrp>
+		<ins>
+			<mnem>PHADDUDQ</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>0F 7A 5B /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PHADDUWD--Packed Horizontal Add Unsigned Word to Doubleword.</brief>
+		<dscrp>Adds each adjacent pair of 16-bit unsigned integer values in an XMM register or 128-bit memory location and packs the 32-bit results in the destination XMM register.</dscrp>
+		<ins>
+			<mnem>PHADDUWD</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>0F 7A 56 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PHADDUWQ--Packed Horizontal Add Unsigned Word to Quadword.</brief>
+		<dscrp>Adds four successive 16-bit unsigned integer values in an XMM register or 128-bit memory location and packs the 64-bit results in the destination XMM register.</dscrp>
+		<ins>
+			<mnem>PHADDUWQ</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>0F 7A 57 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PHADDWD--Packed Horizontal Add Signed Word to Signed Doubleword.</brief>
+		<dscrp>Adds each adjacent pair of 16-bit signed integer values in an XMM register or 128-bit memory location and packs the 32-bit results in the destination XMM register.</dscrp>
+		<ins>
+			<mnem>PHADDWD</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>0F 7A 46 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PHADDWQ--Packed Horizontal Add Signed Word to Signed Quadword.</brief>
+		<dscrp>Adds four successive 16-bit signed integer values in an XMM register or 128-bit memory location and packs the 64-bit results in the destination XMM register.</dscrp>
+		<ins>
+			<mnem>PHADDWQ</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>0F 7A 47 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PHSUBBW--Packed Horizontal Subtract Signed Byte to Signed Word.</brief>
+		<dscrp>Subtracts the most significant byte from the least significant byte of each word in an XMM register or 128-bit memory location and packs the 16-bit results in the destination XMM register.</dscrp>
+		<ins>
+			<mnem>PHSUBBW</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>0F 7A 61 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PHSUBDQ--Packed Horizontal Subtract Signed Doubleword to Signed Quadword.</brief>
+		<dscrp>Subtracts the most significant doubleword from the least significant doubleword of each quadword in an XMM register or 128-bit memory location and packs the 64-bit results in the destination XMM register.</dscrp>
+		<ins>
+			<mnem>PHSUBDQ</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>0F 7A 63 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PHSUBWD--Packed Horizontal Subtract Signed Word to Signed Doubleword.</brief>
+		<dscrp>Subtracts the most significant word from the least significant word of each adjacent pair of 16-bit signed integer values in an XMM register or 128bit memory location and packs the 32-bit results in the destination XMM register.</dscrp>
+		<ins>
+			<mnem>PHSUBWD</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>0F 7A 62 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PMACSDD--Packed Multiply Accumulate Signed Doubleword to Signed Doubleword.</brief>
+		<dscrp>Multiplies each packed 32-bit signed integer values in the second and third operands, then adds the 64-bit product to the corresponding packed 32-bit signed integer value in the fourth operand and writes the signed 32-bit result in the corresponding doubleword of the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PMACSDD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 9E /r /drex0</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PMACSDQH--Packed Multiply Accumulate Signed High Doubleword to Signed Quadword.</brief>
+		<dscrp>Multiplies the high doublewords in the second and third operand, then adds the signed 64-bit products to the signed 64-bit values in the fourth operand and writes the quadword results in the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PMACSDQH</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 9F /r /drex0</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PMACSDQL--Packed Multiply Accumulate Signed Low Doubleword to Signed Quadword.</brief>
+		<dscrp>Multiplies the low doublewords in the second and third operands, then adds the signed 64-bit products to the signed 64-bit values in the fourth operand and writes the signed quadword results in the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PMACSDQL</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 97 /r /drex0</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PMACSSDD--Packed Multiply Accumulate Signed Doubleword to Signed Doubleword with Saturation.</brief>
+		<dscrp>Multiplies each packed 32-bit signed integer values in the second and third operands, then adds each 64-bit product to the corresponding packed 32-bit signed integer value in the fourth operand and writes the signed saturated 32-bit result in the corresponding doubleword of the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PMACSSDD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 8E /r /drex0</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PMACSSDQH--Packed Multiply Accumulate Signed High Doubleword to Signed Quadword with Saturation.</brief>
+		<dscrp>Multiplies the high doublewords in the second and third operands, then adds the signed products to the signed 64-bit integer values in the fourth operand.</dscrp>
+		<ins>
+			<mnem>PMACSSDQH</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 8F /r /drex0</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PMACSSDQL--Packed Multiply Accumulate Signed Low Doubleword to Signed Quadword with Saturation.</brief>
+		<dscrp>Multiplies the low doublewords in the second and third operands, then adds the 64-bit products to the signed 64-bit integer values in the fourth operand and writes the signed saturated quadword result in the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PMACSSDQL</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 87 /r /drex0</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PMACSSWD--Packed Multiply Accumulate Signed Word to Signed Doubleword with Saturation.</brief>
+		<dscrp>Multiplies each odd-numbered packed 16-bit signed integer values in the second and third operands, then adds the 32-bit products to the corresponding packed 32-bit signed integer values in the fourth operand and writes the signed saturated 32-bit results in the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PMACSSWD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 86 /r /drex0</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PMACSSWW--Packed Multiply Accumulate Signed Word to Signed Word with Saturation.</brief>
+		<dscrp>Multiplies packed 16-bit signed integer values in the second and third operands, then adds the 32-bit products to the corresponding packed 16-bit signed integer value in the fourth operand and writes the signed saturated 16-bit results in the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PMACSSWW</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 85 /r /drex0</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PMACSWD--Packed Multiply Accumulate Signed Word to Signed Doubleword.</brief>
+		<dscrp>Multiplies each odd-numbered packed 16-bit signed integer values in second and third operands, then adds each 32bit product to the corresponding packed 32-bit signed integer value in the fourth operand and writes the signed 32-bit result in the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PMACSWD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 96 /r /drex0</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PMACSWW--Packed Multiply Accumulate Signed Word to Signed Word.</brief>
+		<dscrp>Multiplies packed 16-bit signed integer values in the second and third operands, adds each 32-bit product to the corresponding packed 16-bit signed integer value in the fourth operand and writes the signed 16-bit results in the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PMACSWW</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 95 /r /drex0</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PMADCSSWD--Packed Multiply, Add and Accumulate Signed Word to Signed Doubleword with Saturation.</brief>
+		<dscrp>Multiplies packed signed 16bit integer values in the second and third operands, then adds the 32-bit products of the even-odd adjacent words together. Finally, adds their sum to the corresponding packed 32-bit signed integer value in the fourth operand and writes the signed saturated 32-bit results in the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PMADCSSWD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 A6 /r /drex0</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PMADCSWD--Packed Multiply Add and Accumulate Signed Word to Signed Doubleword.</brief>
+		<dscrp>Multiplies packed signed 16bit integer values in the second and third operands, then adds the 32-bit products of the even-odd adjacent words together. Finally, adds their sum to the corresponding packed 32-bit signed integer value in the fourth operand and writes the signed 32-bit results in the destination (xmm1register).</dscrp>
+		<ins>
+			<mnem>PMADCSWD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 B6 /r /drex0</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PPERM--Packed Permute Bytes.</brief>
+		<dscrp>For each byte position of the 16byte result, uses corresponding control byte in fourth operand to perform logical operation on one of 32 bytes from the second and third source operands and writes result in destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PPERM</mnem>
+			<args>xmm1,xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 23 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PPERM</mnem>
+			<args>xmm1,xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 23 /r /drex1</opc>
+		</ins>
+		<ins>
+			<mnem>PPERM</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm1</args>
+			<opc>0F 24 27 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PPERM</mnem>
+			<args>xmm1,xmm3/mem128,xmm2,xmm1</args>
+			<opc>0F 24 27 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PROTB--Packed Rotate Bytes.</brief>
+		<dscrp>Rotates each byte of the source operand (2nd operand) by the amount specified in the signed value of the corresponding count byte (3rd operand) and writes the result in the corresponding byte of the destination.</dscrp>
+		<ins>
+			<mnem>PROTB</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 40 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PROTB</mnem>
+			<args>xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 40 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PROTD--Packed Rotate Doublewords.</brief>
+		<dscrp>Rotates each doubleword of the source operand (2nd operand) by the amount specified in the low-order byte of the corresponding count doubleword (3rd operand) and writes the result in the corresponding doubleword of the destination.</dscrp>
+		<ins>
+			<mnem>PROTD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 42 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PROTD</mnem>
+			<args>xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 42 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PROTQ--Packed Rotate Quadwords.</brief>
+		<dscrp>Rotates each quadword of the source operand (2nd operand) by the amount specified in the low-order byte of the corresponding quadword in the third operand and writes the result in the corresponding quadword of the destination.</dscrp>
+		<ins>
+			<mnem>PROTQ</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 43 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PROTQ</mnem>
+			<args>xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 43 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PROTW--Packed Rotate Words.</brief>
+		<dscrp>Rotates each word of the source operand (2nd operand) by the amount specified in the low-order byte of the corresponding word in the third operand and writes the result in the corresponding word of the destination.</dscrp>
+		<ins>
+			<mnem>PROTW</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 41 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PROTW</mnem>
+			<args>xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 41 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PSHAB--Packed Shift Arithmetic Bytes.</brief>
+		<dscrp>Shifts each byte of second operand by an amount specified in the corresponding byte in the third operand and writes the result in the corresponding byte of the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PSHAB</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 48 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PSHAB</mnem>
+			<args>xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 48 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PSHAD--Packed Shift Arithmetic Doublewords.</brief>
+		<dscrp>Shifts each doubleword of second operand by an amount specified in the low-order byte of the corresponding doubleword of third operand and writes the result in the corresponding doubleword of the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PSHAD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 4A /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PSHAD</mnem>
+			<args>xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 4A /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PSHAQ--Packed Shift Arithmetic Quadwords.</brief>
+		<dscrp>Shifts each quadword of second operand by an amount specified in the low-order byte of the corresponding quadword in the third operand and writes the result in the corresponding quadword of the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PSHAQ</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 4B /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PSHAQ</mnem>
+			<args>xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 4B /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PSHAW--Packed Shift Arithmetic Words.</brief>
+		<dscrp>Shifts each word of second operand by an amount specified in the low-order byte of the corresponding word in the third operand and writes the result in the corresponding word of the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PSHAW</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 49 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PSHAW</mnem>
+			<args>xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 49 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PSHLB--Packed Shift Logical Bytes.</brief>
+		<dscrp>Shifts each byte of the second operand by an amount specified in the corresponding byte in the third operand and writes the result in the corresponding byte of the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PSHLB</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 44 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PSHLB</mnem>
+			<args>xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 44 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PSHLD--Packed Shift Logical Doublewords.</brief>
+		<dscrp>Shifts each doubleword of second operand by an amount specified in the low-order byte of the corresponding doubleword in the third operand and writes the result in the corresponding doubleword of the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PSHLD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 46 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PSHLD</mnem>
+			<args>xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 46 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PSHLQ--Packed Shift Logical Quadwords.</brief>
+		<dscrp>Shifts each quadword of second operand by an amount specified in the low-order byte of the corresponding quadword in the third operand and writes the result in the corresponding quadword of the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PSHLQ</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 47 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PSHLQ</mnem>
+			<args>xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 47 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PSHLW--Packed Shift Logical Words.</brief>
+		<dscrp>Shifts each word of the second operand by an amount specified in the low-order byte of the corresponding word in the third operand and writes the result in the corresponding word of the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>PSHLW</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>0F 24 45 /r /drex0</opc>
+		</ins>
+		<ins>
+			<mnem>PSHLW</mnem>
+			<args>xmm1,xmm3/mem128,xmm2</args>
+			<opc>0F 24 45 /r /drex1</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>PTEST--Predicate Test Register.</brief>
+		<dscrp>Set ZF, if the result of a logical AND of all bits in xmm2/m128 with the corresponding bits in xmm1 is 0s. Set CF, if the result of the logical AND of the source with a logical NOT of the destination is 0s.</dscrp>
+		<ins>
+			<mnem>PTEST</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>66 0F 38 17 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>ROUNDPD--Round Packed Double-Precision Floating-Point.</brief>
+		<dscrp>Rounds two packed double-precision floating-point values in xmm2 or 128-bit memory location and writes the results in the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>ROUNDPD</mnem>
+			<args>xmm1,xmm2/mem128,imm8</args>
+			<opc>66 0F 3A 09 /r ib</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>ROUNDPS--Round Packed Single-Precision Floating-Point.</brief>
+		<dscrp>Rounds four packed single-precision floating-point values in xmm2 or 128-bit memory location and writes the results in the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>ROUNDPS</mnem>
+			<args>xmm1,xmm2/mem128,imm8</args>
+			<opc>66 0F 3A 08 /r ib</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>ROUNDSD--Round Scalar Double-Precision Floating-Point.</brief>
+		<dscrp>Rounds the scalar double-precision floating-point value in the lowest position in xmm2 or 64-bit memory location and writes the results in the lowest position in the destination (xmm1 register).</dscrp>
+		<ins>
+			<mnem>ROUNDSD</mnem>
+			<args>xmm1,xmm2/mem64,imm8</args>
+			<opc>66 0F 3A 0B /r ib</opc>
+		</ins>
+	</common>
+</instrs>
\ No newline at end of file
diff --git a/xml/raw/x86/AMD/SSE5_Rules.dtd b/xml/raw/x86/AMD/SSE5_Rules.dtd
new file mode 100644
index 0000000..c8aac69
--- /dev/null
+++ b/xml/raw/x86/AMD/SSE5_Rules.dtd
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="ASCII"?>
+<!-- XOP Rules -->
+
+<!ELEMENT instrs (common+)>
+<!ELEMENT common (brief,dscrp,ins+)>
+<!ELEMENT ins (mnem,args,opc)>
+
+<!ELEMENT brief (#PCDATA)>
+<!ELEMENT dscrp (#PCDATA)>
+<!ELEMENT mnem (#PCDATA)>
+<!ELEMENT args (#PCDATA)>
+<!ELEMENT opc (#PCDATA)>
+
+
+
+
+<!ATTLIST instrs version CDATA #REQUIRED>
\ No newline at end of file
diff --git a/xml/raw/x86/AMD/XOP.xml b/xml/raw/x86/AMD/XOP.xml
new file mode 100644
index 0000000..f62edb3
--- /dev/null
+++ b/xml/raw/x86/AMD/XOP.xml
@@ -0,0 +1,990 @@
+<?xml version="1.0" encoding="ASCII"?>
+<!DOCTYPE instrs SYSTEM "XOP_Rules.dtd">
+<!-- Copyright (c) 2015 Mahdi Safsafi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+ -->
+<!--   https://github.com/MahdiSafsafi/Parsable-Instructions   -->
+<!-- 
+  This XML file includes all instructions found in :
+  AMD64 Architecture Programmers Manual Volume 6: 128-Bit and 256-Bit XOP and FMA4 Instructions Pub No 43479 Rev 3.04 Date November 2009 document.
+ -->
+<instrs version="1.00">
+	<common>
+		<brief>VFMADDPD--Multiply and Add Packed Double-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFMADDPD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 69 /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMADDPD</mnem>
+			<args>ymm1,ymm2,ymm3/mem256,ymm4</args>
+			<opc>VEX3.mmmmm3.W0.ysrc1.L1.66 69 /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMADDPD</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem128</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 69 /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMADDPD</mnem>
+			<args>ymm1,ymm2,ymm3,ymm4/mem256</args>
+			<opc>VEX3.mmmmm3.W1.ysrc1.L1.66 69 /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFMADDPS--Multiply and Add Packed Single-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFMADDPS</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 68 /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMADDPS</mnem>
+			<args>ymm1,ymm2,ymm3/mem256,ymm4</args>
+			<opc>VEX3.mmmmm3.W0.ysrc1.L1.66 68 /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMADDPS</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem128</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 68 /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMADDPS</mnem>
+			<args>ymm1,ymm2,ymm3,ymm4/mem256</args>
+			<opc>VEX3.mmmmm3.W1.ysrc1.L1.66 68 /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFMADDSD--Multiply and Add Scalar Double-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFMADDSD</mnem>
+			<args>xmm1,xmm2,xmm3/mem64,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 6B /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMADDSD</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem64</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 6B /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFMADDSS--Multiply and Add Scalar Single-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFMADDSS</mnem>
+			<args>xmm1,xmm2,xmm3/mem32,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 6A /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMADDSS</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem32</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 6A /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFMADDSUBPD--Multiply with Alternating Add/Subtract of Packed Double-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFMADDSUBPD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 5D /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMADDSUBPD</mnem>
+			<args>ymm1,ymm2,ymm3/mem256,ymm4</args>
+			<opc>VEX3.mmmmm3.W0.ysrc1.L1.66 5D /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMADDSUBPD</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem128</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 5D /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMADDSUBPD</mnem>
+			<args>ymm1,ymm2,ymm3,ymm4/mem256</args>
+			<opc>VEX3.mmmmm3.W1.ysrc1.L1.66 5D /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFMADDSUBPS--Multiply with Alternating Add/Subtract of Packed Single-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFMADDSUBPS</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 5C /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMADDSUBPS</mnem>
+			<args>ymm1,ymm2,ymm3/mem256,ymm4</args>
+			<opc>VEX3.mmmmm3.W0.ysrc1.L1.66 5C /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMADDSUBPS</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem128</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 5C /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMADDSUBPS</mnem>
+			<args>ymm1,ymm2,ymm3,ymm4/mem256</args>
+			<opc>VEX3.mmmmm3.W1.ysrc1.L1.66 5C /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFMSUBADDPD--Multiply with Alternating Subtract/Add of Packed Double-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFMSUBADDPD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 5F /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMSUBADDPD</mnem>
+			<args>ymm1,ymm2,ymm3/mem256,ymm4</args>
+			<opc>VEX3.mmmmm3.W0.ysrc1.L1.66 5F /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMSUBADDPD</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem128</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 5F /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMSUBADDPD</mnem>
+			<args>ymm1,ymm2,ymm3,ymm4/mem256</args>
+			<opc>VEX3.mmmmm3.W1.ysrc1.L1.66 5F /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFMSUBADDPS--Multiply with Alternating Subtract/Add of Packed Single-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFMSUBADDPS</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 5E /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMSUBADDPS</mnem>
+			<args>ymm1,ymm2,ymm3/mem256,ymm4</args>
+			<opc>VEX3.mmmmm3.W0.ysrc1.L1.66 5E /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMSUBADDPS</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem128</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 5E /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMSUBADDPS</mnem>
+			<args>ymm1,ymm2,ymm3,ymm4/mem256</args>
+			<opc>VEX3.mmmmm3.W1.ysrc1.L1.66 5E /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFMSUBPD--Multiply and Subtract Packed Double-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFMSUBPD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 6D /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMSUBPD</mnem>
+			<args>ymm1,ymm2,ymm3/mem256,ymm4</args>
+			<opc>VEX3.mmmmm3.W0.ysrc1.L1.66 6D /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMSUBPD</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem128</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 6D /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMSUBPD</mnem>
+			<args>ymm1,ymm2,ymm3,ymm4/mem256</args>
+			<opc>VEX3.mmmmm3.W1.ysrc1.L1.66 6D /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFMSUBPS--Multiply and Subtract Packed Single-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFMSUBPS</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 6C /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMSUBPS</mnem>
+			<args>ymm1,ymm2,ymm3/mem256,ymm4</args>
+			<opc>VEX3.mmmmm3.W0.ysrc1.L1.66 6C /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMSUBPS</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem128</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 6C /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMSUBPS</mnem>
+			<args>ymm1,ymm2,ymm3,ymm4/mem256</args>
+			<opc>VEX3.mmmmm3.W1.ysrc1.L1.66 6C /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFMSUBSD--Multiply and Subtract Scalar Double-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFMSUBSD</mnem>
+			<args>xmm1,xmm2,xmm3/mem64,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 6F /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMSUBSD</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem64</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 6F /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFMSUBSS--Multiply and Subtract Scalar Single-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFMSUBSS</mnem>
+			<args>xmm1,xmm2,xmm3/mem32,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 6E /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFMSUBSS</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem32</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 6E /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFNMADDPD--Negative Multiply and Add Packed Double-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFNMADDPD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 79 /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFNMADDPD</mnem>
+			<args>ymm1,ymm2,ymm3/mem256,ymm4</args>
+			<opc>VEX3.mmmmm3.W0.ysrc1.L1.66 79 /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFNMADDPD</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem128</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 79 /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFNMADDPD</mnem>
+			<args>ymm1,ymm2,ymm3,ymm4/mem256</args>
+			<opc>VEX3.mmmmm3.W1.ysrc1.L1.66 79 /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFNMADDPS--Negative Multiply and Add Packed Single-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFNMADDPS</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 78 /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFNMADDPS</mnem>
+			<args>ymm1,ymm2,ymm3/mem256,ymm4</args>
+			<opc>VEX3.mmmmm3.W0.ysrc1.L1.66 78 /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFNMADDPS</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem128</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 78 /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFNMADDPS</mnem>
+			<args>ymm1,ymm2,ymm3,ymm4/mem256</args>
+			<opc>VEX3.mmmmm3.W1.ysrc1.L1.66 78 /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFNMADDSD--Negative Multiply and Add Scalar Double-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFNMADDSD</mnem>
+			<args>xmm1,xmm2,xmm3/mem64,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 7B /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFNMADDSD</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem64</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 7B /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFNMADDSS--Negative Multiply and Add Scalar Single-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFNMADDSS</mnem>
+			<args>xmm1,xmm2,xmm3/mem32,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 7A /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFNMADDSS</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem32</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 7A /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFNMSUBPD--Negative Multiply and Subtract Packed Double-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFNMSUBPD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 7D /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFNMSUBPD</mnem>
+			<args>ymm1,ymm2,ymm3/mem256,ymm4</args>
+			<opc>VEX3.mmmmm3.W0.ysrc1.L1.66 7D /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFNMSUBPD</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem128</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 7D /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFNMSUBPD</mnem>
+			<args>ymm1,ymm2,ymm3,ymm4/mem256</args>
+			<opc>VEX3.mmmmm3.W1.ysrc1.L1.66 7D /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFNMSUBPS--Negative Multiply and Subtract Packed Single-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFNMSUBPS</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 7C /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFNMSUBPS</mnem>
+			<args>ymm1,ymm2,ymm3/mem256,ymm4</args>
+			<opc>VEX3.mmmmm3.W0.ysrc1.L1.66 7C /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFNMSUBPS</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem128</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 7C /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFNMSUBPS</mnem>
+			<args>ymm1,ymm2,ymm3,ymm4/mem256</args>
+			<opc>VEX3.mmmmm3.W1.ysrc1.L1.66 7C /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFNMSUBSD--Negative Multiply and Subtract Scalar Double-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFNMSUBSD</mnem>
+			<args>xmm1,xmm2,xmm3/mem64,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 7F /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFNMSUBSD</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem64</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 7F /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFNMSUBSS--Negative Multiply and Subtract Scalar Single-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFNMSUBSS</mnem>
+			<args>xmm1,xmm2,xmm3/mem32,xmm4</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.66 7E /r /is4</opc>
+		</ins>
+		<ins>
+			<mnem>VFNMSUBSS</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem32</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.66 7E /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFRCZPD--Extract Fraction Packed Double-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFRCZPD</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA 81 /r</opc>
+		</ins>
+		<ins>
+			<mnem>VFRCZPD</mnem>
+			<args>ymm1,ymm2/mem256</args>
+			<opc>XOP.mmmmm9.W0.1111.L1.NA 81 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFRCZPS--Extract Fraction Packed Single-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFRCZPS</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA 80 /r</opc>
+		</ins>
+		<ins>
+			<mnem>VFRCZPS</mnem>
+			<args>ymm1,ymm2/mem256</args>
+			<opc>XOP.mmmmm9.W0.1111.L1.NA 80 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFRCZSD--Extract Fraction Scalar Double-Precision Floating-Point.</brief>
+		<ins>
+			<mnem>VFRCZSD</mnem>
+			<args>xmm1,xmm2/mem64</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA 83 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VFRCZSS--Extract Fraction Scalar Single-Precision Floating Point.</brief>
+		<ins>
+			<mnem>VFRCZSS</mnem>
+			<args>xmm1,xmm2/mem32</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA 82 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPCMOV--Vector Conditional Moves.</brief>
+		<ins>
+			<mnem>VPCMOV</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA A2 /r imm[7:4]</opc>
+		</ins>
+		<ins>
+			<mnem>VPCMOV</mnem>
+			<args>ymm1,ymm2,ymm3/mem256,ymm4</args>
+			<opc>XOP.mmmmm8.W0.ysrc1.L1.NA A2 /r imm[7:4]</opc>
+		</ins>
+		<ins>
+			<mnem>VPCMOV</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem128</args>
+			<opc>XOP.mmmmm8.W1.xsrc1.L0.NA A2 /r imm[7:4]</opc>
+		</ins>
+		<ins>
+			<mnem>VPCMOV</mnem>
+			<args>ymm1,ymm2,ymm3,ymm4/mem256</args>
+			<opc>XOP.mmmmm8.W1.ysrc1.L1.NA A2 /r imm[7:4]</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPCOMB--Compare Vector Signed Bytes.</brief>
+		<ins>
+			<mnem>VPCOMB</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,imm8</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA CC /r /imm8</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPCOMD--Compare Vector Signed Doublewords.</brief>
+		<ins>
+			<mnem>VPCOMD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,imm8</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA CE /r /imm8</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPCOMQ--Compare Vector Signed Quadwords.</brief>
+		<ins>
+			<mnem>VPCOMQ</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,imm8</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA CF /r imm8</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPCOMUB--Compare Vector Unsigned Bytes.</brief>
+		<ins>
+			<mnem>VPCOMUB</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,imm8</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA EC /r imm8</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPCOMUD--Compare Vector Unsigned Doublewords.</brief>
+		<ins>
+			<mnem>VPCOMUD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,imm8</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA EE /r imm8</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPCOMUQ--Compare Vector Unsigned Quadwords.</brief>
+		<ins>
+			<mnem>VPCOMUQ</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,imm8</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA EF /r imm8</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPCOMUW--Compare Vector Unsigned Words.</brief>
+		<ins>
+			<mnem>VPCOMUW</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,imm8</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA ED /r imm8</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPCOMW--Compare Vector Signed Words.</brief>
+		<ins>
+			<mnem>VPCOMW</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,imm8</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA CD /r imm8</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPERMIL2PD--Permute Two-Source Double-Precision Floating- Point Values.</brief>
+		<ins>
+			<mnem>VPERMIL2PD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4,imm8</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.NA 49 /r imm8</opc>
+		</ins>
+		<ins>
+			<mnem>VPERMIL2PD</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem128,imm8</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.NA 49 /r imm8</opc>
+		</ins>
+		<ins>
+			<mnem>VPERMIL2PD</mnem>
+			<args>ymm1,ymm2,ymm3/mem256,ymm4,imm8</args>
+			<opc>VEX3.mmmmm3.W0.ysrc1.L1.NA 49 /r imm8</opc>
+		</ins>
+		<ins>
+			<mnem>VPERMIL2PD</mnem>
+			<args>ymm1,ymm2,ymm3,ymm4/mem256,imm8</args>
+			<opc>VEX3.mmmmm3.W1.ysrc1.L1.NA 49 /r imm8</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPERMIL2PS--Permute Two-Source Single-Precision Floating-Point Values.</brief>
+		<ins>
+			<mnem>VPERMIL2PS</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4,imm8</args>
+			<opc>VEX3.mmmmm3.W0.xsrc1.L0.NA 48 /r imm8</opc>
+		</ins>
+		<ins>
+			<mnem>VPERMIL2PS</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem128,imm8</args>
+			<opc>VEX3.mmmmm3.W1.xsrc1.L0.NA 48 /r imm8</opc>
+		</ins>
+		<ins>
+			<mnem>VPERMIL2PS</mnem>
+			<args>ymm1,ymm2,ymm3/mem256,ymm4,imm8</args>
+			<opc>VEX3.mmmmm3.W0.ysrc1.L1.NA 48 /r imm8</opc>
+		</ins>
+		<ins>
+			<mnem>VPERMIL2PS</mnem>
+			<args>ymm1,ymm2,ymm3,ymm4/mem256,imm8</args>
+			<opc>VEX3.mmmmm3.W1.ysrc1.L1.NA 48 /r imm8</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPHADDBD--Packed Horizontal Add Signed Byte to Signed Doubleword.</brief>
+		<ins>
+			<mnem>VPHADDBD</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA C2 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPHADDBQ--Packed Horizontal Add Signed Byte to Signed Quadword.</brief>
+		<ins>
+			<mnem>VPHADDBQ</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA C3 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPHADDBW--Packed Horizontal Add Signed Byte to Signed Word.</brief>
+		<ins>
+			<mnem>VPHADDBW</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA C1 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPHADDDQ--Packed Horizontal Add Signed Doubleword to Signed Quadword.</brief>
+		<ins>
+			<mnem>VPHADDDQ</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA CB /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPHADDUBD--Packed Horizontal Add Unsigned Byte to Doubleword.</brief>
+		<ins>
+			<mnem>VPHADDUBD</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA D2 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPHADDUBQ--Packed Horizontal Add Unsigned Byte to Quadword.</brief>
+		<ins>
+			<mnem>VPHADDUBQ</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA D3 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPHADDUBW--Packed Horizontal Add Unsigned Byte to Word.</brief>
+		<ins>
+			<mnem>VPHADDUBWD</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA D1 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPHADDUDQ--Packed Horizontal Add Unsigned Doubleword to Quadword.</brief>
+		<ins>
+			<mnem>VPHADDUDQ</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA DB /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPHADDUWDPacked--Horizontal Add Unsigned Word to Doubleword.</brief>
+		<ins>
+			<mnem>VPHADDUWD</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA D6 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPHADDUWQ--Packed Horizontal Add Unsigned Word to Quadword.</brief>
+		<ins>
+			<mnem>VPHADDUWQ</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA D7 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPHADDWD--Packed Horizontal Add Signed Word to Signed Doubleword.</brief>
+		<ins>
+			<mnem>VPHADDWD</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA C6 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPHADDWQ--Packed Horizontal Add Signed Word to Signed Quadword.</brief>
+		<ins>
+			<mnem>VPHADDWQ</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA C7 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPHSUBBW--Packed Horizontal Subtract Signed Byte to Signed Word.</brief>
+		<ins>
+			<mnem>VPHSUBBW</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA E1 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPHSUBDQ--Packed Horizontal Subtract Signed Doubleword to Signed Quadword.</brief>
+		<ins>
+			<mnem>VPHSUBDQ</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA E3 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPHSUBWD--Packed Horizontal Subtract Signed Word to Signed Doubleword.</brief>
+		<ins>
+			<mnem>VPHSUBWD</mnem>
+			<args>xmm1,xmm2/mem128</args>
+			<opc>XOP.mmmmm9.W0.1111.L0.NA E2 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPMACSDD--Packed Multiply Accumulate Signed Doubleword to Signed Doubleword.</brief>
+		<ins>
+			<mnem>VPMACSDD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA 9E /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPMACSDQH--Packed Multiply Accumulate Signed High Doubleword to Signed Quadword.</brief>
+		<ins>
+			<mnem>VPMACSDQH</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA 9F /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPMACSDQL--Packed Multiply Accumulate Signed Low Doubleword to Signed Quadword.</brief>
+		<ins>
+			<mnem>VPMACSDQL</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA 97 /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPMACSSDD--Packed Multiply Accumulate Signed Doubleword to Signed Doubleword with Saturation.</brief>
+		<ins>
+			<mnem>VPMACSSDD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA 8E /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPMACSSDQH--Packed Multiply Accumulate Signed High Doubleword to Signed Quadword with.</brief>
+		<ins>
+			<mnem>VPMACSSDQH</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA 8F /r is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPMACSSDQL--Packed Multiply Accumulate Signed Low Doubleword to Signed Quadword with.</brief>
+		<ins>
+			<mnem>PMACSSDQL</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA 87 /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPMACSSWD--Packed Multiply Accumulate Signed Word to Signed Doubleword with Saturation.</brief>
+		<ins>
+			<mnem>VPMACSSWD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA 86 /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPMACSSWW--Packed Multiply Accumulate Signed Word to Signed Word with Saturation.</brief>
+		<ins>
+			<mnem>PMACSSWW</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA 85 /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPMACSWD--Packed Multiply Accumulate Signed Word to Signed Doubleword.</brief>
+		<ins>
+			<mnem>VPMACSWD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA 96 /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPMACSWW--Packed Multiply Accumulate Signed Word to Signed Word.</brief>
+		<ins>
+			<mnem>VPMACSWW</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA 95 /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPMADCSSWD--Packed Multiply, Add and Accumulate Signed Word to Signed Doubleword with Saturation.</brief>
+		<ins>
+			<mnem>VPMADCSSWD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA A6 /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPMADCSWD--Packed Multiply Add and Accumulate Signed Word to Signed Doubleword.</brief>
+		<ins>
+			<mnem>PMADCSWD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA B6 /r /is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPPERM--Packed Permute Bytes.</brief>
+		<ins>
+			<mnem>VPPERM</mnem>
+			<args>xmm1,xmm2,xmm3,xmm4/mem128</args>
+			<opc>XOP.mmmmm8.W1.xsrc1.L0.NA A3 /r is4</opc>
+		</ins>
+		<ins>
+			<mnem>VPPERM</mnem>
+			<args>xmm1,xmm2,xmm3/mem128,xmm4</args>
+			<opc>XOP.mmmmm8.W0.xsrc1.L0.NA A3 /r is4</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPROTB--Packed Rotate Bytes.</brief>
+		<ins>
+			<mnem>VPROTB</mnem>
+			<args>xmm1,xmm2/mem128,xmm8</args>
+			<opc>XOP.mmmmm9.W0.xcnt.L0.NA 90 /r</opc>
+		</ins>
+		<ins>
+			<mnem>VPROTB</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>XOP.mmmmm9.W1.xsrc.L0.NA 90 /r</opc>
+		</ins>
+		<ins>
+			<mnem>VPROTB</mnem>
+			<args>xmm1,xmm2/mem128,imm8</args>
+			<opc>XOP.mmmmm8.W0.1111.L0.NA C0 /r /ib</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPROTD--Packed Rotate Doublewords.</brief>
+		<ins>
+			<mnem>VPROTD</mnem>
+			<args>xmm1,xmm2/mem128,xmm3</args>
+			<opc>XOP.mmmmm9.W0.xcnt.L0.NA 92 /r</opc>
+		</ins>
+		<ins>
+			<mnem>VPROTD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>XOP.mmmmm9.W1.xsrc.L0.NA 92 /r</opc>
+		</ins>
+		<ins>
+			<mnem>VPROTD</mnem>
+			<args>xmm1,xmm2/mem128,imm8</args>
+			<opc>XOP.mmmmm8.W0.1111.L0.NA C2 /ib</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPROTQ--Packed Rotate Quadwords.</brief>
+		<ins>
+			<mnem>VPROTQ</mnem>
+			<args>xmm1,xmm2/mem128,xmm3</args>
+			<opc>XOP.mmmmm9.W0.xcnt.L0.NA 93 /r</opc>
+		</ins>
+		<ins>
+			<mnem>VPROTQ</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>XOP.mmmmm9.W1.xsrc.L0.NA 93 /r</opc>
+		</ins>
+		<ins>
+			<mnem>VPROTQ</mnem>
+			<args>xmm1,xmm2/mem128,imm8</args>
+			<opc>XOP.mmmmm8.W0.1111.L0.NA C3 /ib</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPROTW--Packed Rotate Words.</brief>
+		<ins>
+			<mnem>VPROTW</mnem>
+			<args>xmm1,xmm2/mem128,xmm3</args>
+			<opc>XOP.mmmmm9.W0.xcnt.L0.NA 91 /r</opc>
+		</ins>
+		<ins>
+			<mnem>VPROTW</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>XOP.mmmmm9.W1.xsrc.L0.NA 91 /r</opc>
+		</ins>
+		<ins>
+			<mnem>VPROTW</mnem>
+			<args>xmm1,xmm2/mem128,imm8</args>
+			<opc>XOP.mmmmm8.W0.1111.L0.NA C1 /r /ib</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPSHAB--Packed Shift Arithmetic Bytes.</brief>
+		<ins>
+			<mnem>VPSHAB</mnem>
+			<args>xmm1,xmm2/mem128,xmm3</args>
+			<opc>XOP.mmmmm9.W0.xcnt.L0.NA 98 /r</opc>
+		</ins>
+		<ins>
+			<mnem>VPSHAB</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>XOP.mmmmm9.W1.xsrc.L0.NA 98 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPSHAD--Packed Shift Arithmetic Doublewords.</brief>
+		<ins>
+			<mnem>VPSHAD</mnem>
+			<args>xmm1,xmm2/mem128,xmm3</args>
+			<opc>XOP.mmmmm9.W0.xcnt.L0.NA 9A /r</opc>
+		</ins>
+		<ins>
+			<mnem>VPSHAD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>XOP.mmmmm9.W1.xsrc.L0.NA 9A /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPSHAQ--Packed Shift Arithmetic Quadwords.</brief>
+		<ins>
+			<mnem>VPSHAQ</mnem>
+			<args>xmm1,xmm2/mem128,xmm3</args>
+			<opc>XOP.mmmmm9.W0.xcnt.L0.NA 9B /r</opc>
+		</ins>
+		<ins>
+			<mnem>VPSHAQ</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>XOP.mmmmm9.W1.xsrc.L0.NA 9B /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPSHAW--Packed Shift Arithmetic Words.</brief>
+		<ins>
+			<mnem>VPSHAW</mnem>
+			<args>xmm1,xmm2/mem128,xmm3</args>
+			<opc>XOP.mmmmm9.W0.xcnt.L0.NA 99 /r</opc>
+		</ins>
+		<ins>
+			<mnem>VPSHAW</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>XOP.mmmmm9.W1.xsrc.L0.NA 99 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPSHLB--Packed Shift Logical Bytes.</brief>
+		<ins>
+			<mnem>VPSHLB</mnem>
+			<args>xmm1,xmm2/mem128,xmm3</args>
+			<opc>XOP.mmmmm9.W0.xcnt.L0.NA 94 /r</opc>
+		</ins>
+		<ins>
+			<mnem>VPSHLB</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>XOP.mmmmm9.W1.xsrc.L0.NA 94 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPSHLD--Packed Shift Logical Doublewords.</brief>
+		<ins>
+			<mnem>VPSHLD</mnem>
+			<args>xmm1,xmm3/mem128,xmm2</args>
+			<opc>XOP.mmmmm9.W0.xcnt.L0.NA 96 /r</opc>
+		</ins>
+		<ins>
+			<mnem>VPSHLD</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>XOP.mmmmm9.W1.xsrc.L0.NA 96 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPSHLQ--Packed Shift Logical Quadwords.</brief>
+		<ins>
+			<mnem>VPSHLQ</mnem>
+			<args>xmm1,xmm3/mem128,xmm2</args>
+			<opc>XOP.mmmmm9.W0.xcnt.L0.NA 97 /r</opc>
+		</ins>
+		<ins>
+			<mnem>VPSHLQ</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>XOP.mmmmm9.W1.xsrc.L0.NA 97 /r</opc>
+		</ins>
+	</common>
+	<common>
+		<brief>VPSHLW--Packed Shift Logical Words.</brief>
+		<ins>
+			<mnem>VPSHLW</mnem>
+			<args>xmm1,xmm3/mem128,xmm2</args>
+			<opc>XOP.mmmmm9.W0.xcnt.L0.NA 95 /r</opc>
+		</ins>
+		<ins>
+			<mnem>VPSHLW</mnem>
+			<args>xmm1,xmm2,xmm3/mem128</args>
+			<opc>XOP.mmmmm9.W1.xsrc.L0.NA 95 /r</opc>
+		</ins>
+	</common>
+</instrs>
\ No newline at end of file
diff --git a/xml/raw/x86/AMD/XOP_Rules.dtd b/xml/raw/x86/AMD/XOP_Rules.dtd
new file mode 100644
index 0000000..d835480
--- /dev/null
+++ b/xml/raw/x86/AMD/XOP_Rules.dtd
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="ASCII"?>
+<!-- XOP Rules -->
+
+<!ELEMENT instrs (common+)>
+<!ELEMENT common (brief,ins+)>
+<!ELEMENT ins (mnem,args,opc)>
+
+<!ELEMENT brief (#PCDATA)>
+<!ELEMENT mnem (#PCDATA)>
+<!ELEMENT args (#PCDATA)>
+<!ELEMENT opc (#PCDATA)>
+
+
+
+<!ATTLIST instrs version CDATA #REQUIRED>
\ No newline at end of file
diff --git a/xml/raw/x86/Intel/AVX512_Rules.dtd b/xml/raw/x86/Intel/AVX512_Rules.dtd
new file mode 100644
index 0000000..4b1e190
--- /dev/null
+++ b/xml/raw/x86/Intel/AVX512_Rules.dtd
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="ASCII"?>
+<!-- AZ Rules -->
+<!--
+https://github.com/MahdiSafsafi/Parsable-Instructions
+-->
+
+<!-- XML data must be validated.
+     If XML validation failed ,you probably have a corrupted data ! 
+-->
+
+<!ELEMENT instrs (common+)>
+
+<!ELEMENT common (brief,ins+,oprndenc*)>
+
+<!-- cpuid and dscrp elements are optional. -->
+<!ELEMENT ins (mnem,args,opc,cpuid*,dscrp*)>
+<!-- If cpuid tag found , flag tag must exists. -->
+<!ELEMENT cpuid (flag+)>
+<!-- If oprndenc tag found , all oprndX must exists. -->
+<!ELEMENT oprndenc (oprnd1,oprnd2,oprnd3,oprnd4)>
+
+<!ELEMENT brief (#PCDATA)>
+<!ELEMENT mnem (#PCDATA)>
+<!ELEMENT args (#PCDATA)>
+<!ELEMENT opc (#PCDATA)>
+<!ELEMENT flag (#PCDATA)>
+<!ELEMENT dscrp (#PCDATA)>
+<!ELEMENT oprnd1 (#PCDATA)>
+<!ELEMENT oprnd2 (#PCDATA)>
+<!ELEMENT oprnd3 (#PCDATA)>
+<!ELEMENT oprnd4 (#PCDATA)>
+
+<!-- version attribute must be specified !-->
+<!ATTLIST instrs version CDATA #REQUIRED>
+<!ATTLIST ins x32m CDATA "V"> <!-- x32m default to Valid if not specified.-->
+<!ATTLIST ins x64m CDATA "V"> <!-- x64m default to Valid if not specified.-->
+<!ATTLIST opc openc CDATA "">
+<!ATTLIST oprndenc openc CDATA #REQUIRED>
\ No newline at end of file
diff --git a/xml/raw/x86/Intel/AVX512_r22.xml b/xml/raw/x86/Intel/AVX512_r22.xml
new file mode 100644
index 0000000..1279076
--- /dev/null
+++ b/xml/raw/x86/Intel/AVX512_r22.xml
@@ -0,0 +1,25901 @@
+<?xml version="1.0" encoding="ASCII"?>
+<!DOCTYPE instrs SYSTEM "AVX512_Rules.dtd">
+<!-- Copyright (c) 2016 Mahdi Safsafi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+ -->
+<!--   https://github.com/MahdiSafsafi/Parsable-Instructions   -->
+<!-- 
+  This XML file includes all instructions found in :
+  Intel Architecture Instruction Set Extensions Programming Reference 319433-022 document.
+ -->
+<!-- 
+****KEY TO ABBREVIATIONS****
+  x32m = 32-bit mode support.
+  x64m = 64-bit mode support.
+  mnem = Instruction Mnemonic.
+  args = Instruction Arguments.
+  opc  = Opcodes.
+  openc = Operand Encoding.
+  dscrp = Description.
+  oprndenc = Instruction Operand Encoding.
+  oprnd1 = Operand 1.
+  oprnd2 = Operand 2.
+  oprnd3 = Operand 3.
+  oprnd4 = Operand 4.
+
+****FOR THE REST OF KEYS YOU SHOULD REFER TO INTEL DOCUMENTATIONS!****
+ -->
+<instrs version="1.00">
+	<common>
+		<brief>ADDPD--Add Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ADDPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 58 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed double-precision floating-point values from xmm2/mem to xmm1 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 58 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed double-precision floating-point values from xmm3/mem to xmm2 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 58 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed double-precision floating-point values from ymm3/mem to ymm2 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 58 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed double-precision floating-point values from xmm3/m128/m64bcst to xmm2 and store result in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 58 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed double-precision floating-point values from ymm3/m256/m64bcst to ymm2 and store result in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 58 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed double-precision floating-point values from zmm3/m512/m64bcst to zmm2 and store result in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ADDPS--Add Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ADDPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 58 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Add packed single-precision floating-point values from xmm2/m128 to xmm1 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 58 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed single-precision floating-point values from xmm3/m128 to xmm2 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 58 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed single-precision floating-point values from ymm3/m256 to ymm2 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed single-precision floating-point values from xmm3/m128/m32bcst to xmm2 and store result in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed single-precision floating-point values from ymm3/m256/m32bcst to ymm2 and store result in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst {er}</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed single-precision floating-point values from zmm3/m512/m32bcst to zmm2 and store result in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ADDSD--Add Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ADDSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 58 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add the low double-precision floating-point value from xmm2/mem to xmm1 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 58 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add the low double-precision floating-point value from xmm3/mem to xmm2 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 58 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add the low double-precision floating-point value from xmm3/m64 to xmm2 and store the result in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ADDSS--Add Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ADDSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 58 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Add the low single-precision floating-point value from xmm2/mem to xmm1 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 58 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add the low single-precision floating-point value from xmm3/mem to xmm2 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDSS</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add the low single-precision floating-point value from xmm3/m32 to xmm2 and store the result in xmm1with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VALIGND/VALIGNQ--Align Doubleword/Quadword Vectors.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VALIGND</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W0 03 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift right and merge vectors xmm2 and xmm3/m128/m32bcst with double-word granularity using imm8 as number of elements to shift, and store the final result in xmm1, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VALIGNQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W1 03 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift right and merge vectors xmm2 and xmm3/m128/m64bcst with quad-word granularity using imm8 as number of elements to shift, and store the final result in xmm1, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VALIGND</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 03 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift right and merge vectors ymm2 and ymm3/m256/m32bcst with double-word granularity using imm8 as number of elements to shift, and store the final result in ymm1, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VALIGNQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 03 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift right and merge vectors ymm2 and ymm3/m256/m64bcst with quad-word granularity using imm8 as number of elements to shift, and store the final result in ymm1, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VALIGND</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 03 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift right and merge vectors zmm2 and zmm3/m512/m32bcst with double-word granularity using imm8 as number of elements to shift, and store the final result in zmm1, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VALIGNQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 03 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift right and merge vectors zmm2 and zmm3/m512/m64bcst with quad-word granularity using imm8 as number of elements to shift, and store the final result in zmm1, under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VBLENDMPD/VBLENDMPS--Blend Float64/Float32 Vectors Using an OpMask Control.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDMPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 65 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend double-precision vector xmm2 and double-precision vector xmm3/m128/m64bcst and store the result in xmm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDMPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 65 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend double-precision vector ymm2 and double-precision vector ymm3/m256/m64bcst and store the result in ymm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDMPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 65 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend double-precision vector zmm2 and double-precision vector zmm3/m512/m64bcst and store the result in zmm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDMPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 65 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend single-precision vector xmm2 and single-precision vector xmm3/m128/m32bcst and store the result in xmm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDMPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 65 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend single-precision vector ymm2 and single-precision vector ymm3/m256/m32bcst and store the result in ymm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDMPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 65 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend single-precision vector zmm2 and single-precision vector zmm3/m512/m32bcst using k1 as select control and store the result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPBLENDMB/VPBLENDMW--Blend Byte/Word Vectors Using an Opmask Control.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.W0 66 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Blend byte integer vector xmm2 and byte vector xmm3/m128 and store the result in xmm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W0 66 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Blend byte integer vector ymm2 and byte vector ymm3/m256 and store the result in ymm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W0 66 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Blend byte integer vector zmm2 and byte vector zmm3/m512 and store the result in zmm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.W1 66 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Blend word integer vector xmm2 and word vector xmm3/m128 and store the result in xmm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W1 66 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Blend word integer vector ymm2 and word vector ymm3/m256 and store the result in ymm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W1 66 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Blend word integer vector zmm2 and word vector zmm3/m512 and store the result in zmm1, under control mask.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPBLENDMD/VPBLENDMQ--Blend Int32/Int64 Vectors Using an OpMask Control.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 64 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend doubleword integer vector xmm2 and doubleword vector xmm3/m128/m32bcst and store the result in xmm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 64 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend doubleword integer vector ymm2 and doubleword vector ymm3/m256/m32bcst and store the result in ymm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 64 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend doubleword integer vector zmm2 and doubleword vector zmm3/m512/m32bcst and store the result in zmm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 64 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend quadword integer vector xmm2 and quadword vector xmm3/m128/m64bcst and store the result in xmm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 64 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend quadword integer vector ymm2 and quadword vector ymm3/m256/m64bcst and store the result in ymm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 64 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend quadword integer vector zmm2 and quadword vector zmm3/m512/m64bcst and store the result in zmm1, under control mask.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ANDPD--Bitwise Logical AND of Packed Double Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ANDPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 54 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F 54 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F 54 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 54 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in xmm2 and xmm3/m128/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 54 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in ymm2 and ymm3/m256/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 54 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in zmm2 and zmm3/m512/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ANDPS--Bitwise Logical AND of Packed Single Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ANDPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 54 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F 54 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F 54 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 54 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in xmm2 and xmm3/m128/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 54 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in ymm2 and ymm3/m256/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 54 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in zmm2 and zmm3/m512/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ANDNPD--Bitwise Logical AND NOT of Packed Double Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ANDNPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 55 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed doubleprecision floating-point values in xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F 55 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed doubleprecision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F 55/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed doubleprecision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 55 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed doubleprecision floating-point values in xmm2 and xmm3/m128/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 55 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed doubleprecision floating-point values in ymm2 and ymm3/m256/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 55 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed doubleprecision floating-point values in zmm2 and zmm3/m512/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ANDNPS--Bitwise Logical AND NOT of Packed Single Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ANDNPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 55 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed single-precision floating-point values in xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F 55 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed single-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F 55 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed single-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 54 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in xmm2 and xmm3/m128/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 54 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in ymm2 and ymm3/m256/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 54 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in zmm2 and zmm3/m512/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VBROADCAST--Load with Broadcast Floating-Point Data.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSS</mnem>
+			<args>xmm1,m32</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 18 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Broadcast single-precision floating-point element in mem to four locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSS</mnem>
+			<args>ymm1,m32</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 18 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Broadcast single-precision floating-point element in mem to eight locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSD</mnem>
+			<args>ymm1,m64</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 19 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Broadcast double-precision floating-point element in mem to four locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF128</mnem>
+			<args>ymm1,m128</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 1A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of floating-point data in mem to low and high 128-bits in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSD</mnem>
+			<args>ymm1 {k1}{z},xmm2/m64</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 19 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast low double-precision floating-point element in xmm2/m64 to four locations in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSD</mnem>
+			<args>zmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 19 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast low double-precision floating-point element in xmm2/m64 to eight locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF32X2</mnem>
+			<args>ymm1 {k1}{z},xmm2/m64</args>
+			<opc openc="T2">EVEX.256.66.0F38.W0 19 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast two single-precision floating-point elements in xmm2/m64 to locations in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF32X2</mnem>
+			<args>zmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="T2">EVEX.512.66.0F38.W0 19 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast two single-precision floating-point elements in xmm2/m64 to locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m32</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 18 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast low single-precision floating-point element in xmm2/m32 to all locations in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSS</mnem>
+			<args>ymm1 {k1}{z},xmm2/m32</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 18 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast low single-precision floating-point element in xmm2/m32 to all locations in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSS</mnem>
+			<args>zmm1 {k1}{z},xmm2/m32</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 18 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast low single-precision floating-point element in xmm2/m32 to all locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF32X4</mnem>
+			<args>ymm1 {k1}{z},m128</args>
+			<opc openc="T4">EVEX.256.66.0F38.W0 1A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of 4 single-precision floating-point data in mem to locations in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF32X4</mnem>
+			<args>zmm1 {k1}{z},m128</args>
+			<opc openc="T4">EVEX.512.66.0F38.W0 1A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of 4 single-precision floating-point data in mem to locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF64X2</mnem>
+			<args>ymm1 {k1}{z},m128</args>
+			<opc openc="T2">EVEX.256.66.0F38.W1 1A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of 2 double-precision floating-point data in mem to locations in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF64X2</mnem>
+			<args>zmm1 {k1}{z},m128</args>
+			<opc openc="T2">EVEX.512.66.0F38.W1 1A /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of 2 double-precision floating-point data in mem to locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF32X8</mnem>
+			<args>zmm1 {k1}{z},m256</args>
+			<opc openc="T8">EVEX.512.66.0F38.W0 1B /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast 256 bits of 8 single-precision floating-point data in mem to locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF64X4</mnem>
+			<args>zmm1 {k1}{z},m256</args>
+			<opc openc="T4">EVEX.512.66.0F38.W1 1B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast 256 bits of 4 double-precision floating-point data in mem to locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2">
+			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T4">
+			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T8ModRM">
+			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPBROADCASTB/W/D/Q--Load with Broadcast Integer Data from General Purpose Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTB</mnem>
+			<args>xmm1 {k1}{z},reg</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast an 8-bit value from a GPR to all bytes in the 128-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTB</mnem>
+			<args>ymm1 {k1}{z},reg</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast an 8-bit value from a GPR to all bytes in the 256-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTB</mnem>
+			<args>zmm1 {k1}{z},reg</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast an 8-bit value from a GPR to all bytes in the 512-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTW</mnem>
+			<args>xmm1 {k1}{z},reg</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 7B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast a 16-bit value from a GPR to all words in the 128-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTW</mnem>
+			<args>ymm1 {k1}{z},reg</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 7B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast a 16-bit value from a GPR to all words in the 256-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTW</mnem>
+			<args>zmm1 {k1}{z},reg</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 7B /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast a 16-bit value from a GPR to all words in the 512-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTD</mnem>
+			<args>xmm1 {k1}{z},r32</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 7C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a 32-bit value from a GPR to all double-words in the 128-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTD</mnem>
+			<args>ymm1 {k1}{z},r32</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 7C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a 32-bit value from a GPR to all double-words in the 256-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTD</mnem>
+			<args>zmm1 {k1}{z},r32</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 7C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a 32-bit value from a GPR to all double-words in the 512-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VPBROADCASTQ</mnem>
+			<args>xmm1 {k1}{z},r64</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 7C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a 64-bit value from a GPR to all quad-words in the 128-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VPBROADCASTQ</mnem>
+			<args>ymm1 {k1}{z},r64</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 7C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a 64-bit value from a GPR to all quad-words in the 256-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VPBROADCASTQ</mnem>
+			<args>zmm1 {k1}{z},r64</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 7C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a 64-bit value from a GPR to all quad-words in the 512-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPBROADCAST--Load Integer and Broadcast.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTB</mnem>
+			<args>xmm1,xmm2/m8</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a byte integer in the source operand to sixteen locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTB</mnem>
+			<args>ymm1,xmm2/m8</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a byte integer in the source operand to thirty-two locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTB</mnem>
+			<args>xmm1{k1}{z},xmm2/m8</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast a byte integer in the source operand to locations in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTB</mnem>
+			<args>ymm1{k1}{z},xmm2/m8</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast a byte integer in the source operand to locations in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTB</mnem>
+			<args>zmm1{k1}{z},xmm2/m8</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast a byte integer in the source operand to 64 locations in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTW</mnem>
+			<args>xmm1,xmm2/m16</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a word integer in the source operand to eight locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTW</mnem>
+			<args>ymm1,xmm2/m16</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a word integer in the source operand to sixteen locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTW</mnem>
+			<args>xmm1{k1}{z},xmm2/m16</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast a word integer in the source operand to locations in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTW</mnem>
+			<args>ymm1{k1}{z},xmm2/m16</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast a word integer in the source operand to locations in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTW</mnem>
+			<args>zmm1{k1}{z},xmm2/m16</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast a word integer in the source operand to 32 locations in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTD</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a dword integer in the source operand to four locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTD</mnem>
+			<args>ymm1,xmm2/m32</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a dword integer in the source operand to eight locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m32</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a dword integer in the source operand to locations in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTD</mnem>
+			<args>ymm1 {k1}{z},xmm2/m32</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a dword integer in the source operand to locations in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTD</mnem>
+			<args>zmm1 {k1}{z},xmm2/m32</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a dword integer in the source operand to locations in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a qword element in source operand to two locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTQ</mnem>
+			<args>ymm1,xmm2/m64</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a qword element in source operand to four locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 59 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a qword element in source operand to locations in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m64</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 59 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a qword element in source operand to locations in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTQ</mnem>
+			<args>zmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 59 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a qword element in source operand to locations in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI32x2</mnem>
+			<args>xmm1 {k 1}{z},xmm2/m64</args>
+			<opc openc="T2">EVEX.128.66.0F38.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast two dword elements in source operand to locations in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI32x2</mnem>
+			<args>ymm1 {k 1}{z},xmm2/m64</args>
+			<opc openc="T2">EVEX.256.66.0F38.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast two dword elements in source operand to locations in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI32x2</mnem>
+			<args>zmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="T2">EVEX.512.66.0F38.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast two dword elements in source operand to locations in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI128</mnem>
+			<args>ymm1,m128</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 5A /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of integer data in mem to low and high 128-bits in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI32X4</mnem>
+			<args>ymm1 {k1}{z},m128</args>
+			<opc openc="T4">EVEX.256.66.0F38.W0 5A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of 4 doubleword integer data in mem to locations in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI32X4</mnem>
+			<args>zmm1 {k1}{z},m128</args>
+			<opc openc="T4">EVEX.512.66.0F38.W0 5A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of 4 doubleword integer data in mem to locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI64X2</mnem>
+			<args>ymm1 {k1}{z},m128</args>
+			<opc openc="T2">EVEX.256.66.0F38.W1 5A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of 2 quadword integer data in mem to locations in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI64X2</mnem>
+			<args>zmm1 {k1}{z},m128</args>
+			<opc openc="T2">EVEX.512.66.0F38.W1 5A /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of 2 quadword integer data in mem to locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI32X8</mnem>
+			<args>zmm1 {k1}{z},m256</args>
+			<opc openc="T8">EVEX.512.66.0F38.W1 5B /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast 256 bits of 8 doubleword integer data in mem to locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI64X4</mnem>
+			<args>zmm1 {k1}{z},m256</args>
+			<opc openc="T4">EVEX.512.66.0F38.W1 5B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast 256 bits of 4 quadword integer data in mem to locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T4">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T8">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CMPPD--Compare Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CMPPD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F C2 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed double-precision floating-point values in xmm2/m128 and xmm1 using bits 2:0 of imm8 as a comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F.WIG C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed double-precision floating-point values in xmm3/m128 and xmm2 using bits 4:0 of imm8 as a comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F.WIG C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed double-precision floating-point values in ymm3/m256 and ymm2 using bits 4:0 of imm8 as a comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPD</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed double-precision floating-point values in xmm3/m128/m64bcst and xmm2 using bits 4:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPD</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed double-precision floating-point values in ymm3/m256/m64bcst and ymm2 using bits 4:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPD</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m64bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed double-precision floating-point values in zmm3/m512/m64bcst and zmm2 using bits 4:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CMPPS--Compare Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CMPPS</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">0F C2 /r ib</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Compare packed single-precision floating-point values in xmm2/m128 and xmm1 using bits 2:0 of imm8 as a comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.0F.WIG C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed single-precision floating-point values in xmm3/m128 and xmm2 using bits 4:0 of imm8 as a comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.0F.WIG C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed single-precision floating-point values in ymm3/m256 and ymm2 using bits 4:0 of imm8 as a comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPS</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed single-precision floating-point values in xmm3/m128/m32bcst and xmm2 using bits 4:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPS</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed single-precision floating-point values in ymm3/m256/m32bcst and ymm2 using bits 4:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPS</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m32bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed single-precision floating-point values in zmm3/m512/m32bcst and zmm2 using bits 4:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CMPSD--Compare Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CMPSD</mnem>
+			<args>xmm1,xmm2/m64,imm8</args>
+			<opc openc="RMI">F2 0F C2 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point value in xmm2/m64 and xmm1 using bits 2:0 of imm8 as comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.F2.0F.WIG C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point value in xmm3/m64 and xmm2 using bits 4:0 of imm8 as comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPSD</mnem>
+			<args>k1 {k2},xmm2,xmm3/m64{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point value in xmm3/m64 and xmm2 using bits 4:0 of imm8 as comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CMPSS--Compare Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CMPSS</mnem>
+			<args>xmm1,xmm2/m32,imm8</args>
+			<opc openc="RMI">F3 0F C2 /r ib</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of imm8 as comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.F3.0F.WIG C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point value in xmm3/m32 and xmm2 using bits 4:0 of imm8 as comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPSS</mnem>
+			<args>k1 {k2},xmm2,xmm3/m32{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point value in xmm3/m32 and xmm2 using bits 4:0 of imm8 as comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>COMISD--Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>COMISD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0F 2F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/mem64 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMISD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 2F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/mem64 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMISD</mnem>
+			<args>xmm1,xmm2/m64{sae}</args>
+			<opc openc="T1S">EVEX.LIG.66.0F.W1 2F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/mem64 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>COMISS--Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>COMISS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">0F 2F /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMISS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.0F.WIG 2F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMISS</mnem>
+			<args>xmm1,xmm2/m32{sae}</args>
+			<opc openc="T1S">EVEX.LIG.0F.W0 2F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>DIVPD--Divide Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>DIVPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 5E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Divide packed double-precision floating-point values in xmm1 by packed double-precision floating-point values in xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 5E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Divide packed double-precision floating-point values in xmm2 by packed double-precision floating-point values in xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 5E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Divide packed double-precision floating-point values in ymm2 by packed double-precision floating-point values in ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 5E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Divide packed double-precision floating-point values in xmm2 by packed double-precision floating-point values in xmm3/m128/m64bcst and write results to xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 5E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Divide packed double-precision floating-point values in ymm2 by packed double-precision floating-point values in ymm3/m256/m64bcst and write results to ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 5E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Divide packed double-precision floating-point values in zmm2 by packed double-precision FP values in zmm3/m512/m64bcst and write results to zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>DIVPS--Divide Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>DIVPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 5E /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Divide packed single-precision floating-point values in xmm1 by packed single-precision floating-point values in xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 5E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Divide packed single-precision floating-point values in xmm2 by packed single-precision floating-point values in xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 5E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Divide packed single-precision floating-point values in ymm2 by packed single-precision floating-point values in ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 5E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Divide packed single-precision floating-point values in xmm2 by packed single-precision floating-point values in xmm3/m128/m32bcst and write results to xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 5E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Divide packed single-precision floating-point values in ymm2 by packed single-precision floating-point values in ymm3/m256/m32bcst and write results to ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 5E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Divide packed single-precision floating-point values in zmm2 by packed single-precision floating-point values in zmm3/m512/m32bcst and write results to zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>DIVSD--Divide Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>DIVSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 5E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Divide low double-precision floating-point value in xmm1 by low double-precision floating-point value in xmm2/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 5E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Divide low double-precision floating-point value in xmm2 by low double-precision floating-point value in xmm3/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 5E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Divide low double-precision floating-point value in xmm2 by low double-precision floating-point value in xmm3/m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>DIVSS--Divide Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>DIVSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 5E /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Divide low single-precision floating-point value in xmm1 by low single-precision floating-point value in xmm2/m32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 5E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Divide low single-precision floating-point value in xmm2 by low single-precision floating-point value in xmm3/m32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 5E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Divide low single-precision floating-point value in xmm2 by low single-precision floating-point value in xmm3/m32.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCOMPRESSPD--Store Sparse Packed Double-Precision Floating-Point Values into Dense Memory.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMPRESSPD</mnem>
+			<args>xmm1/m128 {k1}{z},xmm2</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 8A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed double-precision floating-point values from xmm2 to xmm1/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMPRESSPD</mnem>
+			<args>ymm1/m256 {k1}{z},ymm2</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 8A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed double-precision floating-point values from ymm2 to ymm1/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMPRESSPD</mnem>
+			<args>zmm1/m512 {k1}{z},zmm2</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 8A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed double-precision floating-point values from zmm2 using control mask k1 to zmm1/m512.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCOMPRESSPS--Store Sparse Packed Single-Precision Floating-Point Values into Dense Memory.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMPRESSPS</mnem>
+			<args>xmm1/m128 {k1}{z},xmm2</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 8A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed single-precision floating-point values from xmm2 to xmm1/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMPRESSPS</mnem>
+			<args>ymm1/m256 {k1}{z},ymm2</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 8A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed single-precision floating-point values from ymm2 to ymm1/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMPRESSPS</mnem>
+			<args>zmm1/m512 {k1}{z},zmm2</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 8A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed single-precision floating-point values from zmm2 using control mask k1 to zmm1/m512.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTDQ2PD--Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTDQ2PD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F3 0F E6 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert two packed signed doubleword integers from xmm2/mem to two packed double-precision floatingpoint values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG E6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert two packed signed doubleword integers from xmm2/mem to two packed double-precision floatingpoint values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PD</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.F3.0F.WIG E6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed signed doubleword integers from xmm2/mem to four packed double-precision floatingpoint values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="HV">EVEX.128.F3.0F.W0 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert 2 packed signed doubleword integers from xmm2/m128/m32bcst to eight packed double-precision floating-point values in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PD</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="HV">EVEX.256.F3.0F.W0 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert 4 packed signed doubleword integers from xmm2/m128/m32bcst to 4 packed double-precision floating-point values in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PD</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="HV">EVEX.512.F3.0F.W0 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed signed doubleword integers from ymm2/m256/m32bcst to eight packed double-precision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="HV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTDQ2PS--Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTDQ2PS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 5B /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert four packed signed doubleword integers from xmm2/mem to four packed single-precision floatingpoint values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.0F.WIG 5B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed signed doubleword integers from xmm2/mem to four packed single-precision floatingpoint values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PS</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.0F.WIG 5B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert eight packed signed doubleword integers from ymm2/mem to eight packed single-precision floatingpoint values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.0F.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed signed doubleword integers from xmm2/m128/m32bcst to four packed single-precision floating-point values in xmm1with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.0F.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed signed doubleword integers from ymm2/m256/m32bcst to eight packed single-precision floating-point values in ymm1with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.512.0F.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert sixteen packed signed doubleword integers from zmm2/m512/m32bcst to sixteen packed singleprecision floating-point values in zmm1with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTPD2DQ--Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTPD2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">F2 0F E6 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/mem to two signed doubleword integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.F2.0F.WIG E6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/mem to two signed doubleword integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2DQ</mnem>
+			<args>xmm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.F2.0F.WIG E6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values in ymm2/mem to four signed doubleword integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2DQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.F2.0F.W1 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/m128/m64bcst to two signed doubleword integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2DQ</mnem>
+			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.F2.0F.W1 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values in ymm2/m256/m64bcst to four signed doubleword integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2DQ</mnem>
+			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.F2.0F.W1 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed double-precision floating-point values in zmm2/m512/m64bcst to eight signed doubleword integers in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTPD2PS--Convert Packed Double-Precision Floating-Point Values to Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTPD2PS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 5A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/mem to two single-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2PS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 5A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/mem to two single-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2PS</mnem>
+			<args>xmm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 5A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values in ymm2/mem to four single-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2PS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F.W1 5A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/m128/m64bcst to two singleprecision floating-point values in xmm1with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2PS</mnem>
+			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F.W1 5A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values in ymm2/m256/m64bcst to four singleprecision floating-point values in xmm1with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2PS</mnem>
+			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.66.0F.W1 5A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed double-precision floating-point values in zmm2/m512/m64bcst to eight singleprecision floating-point values in ymm1with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTPD2QQ--Convert Packed Double-Precision Floating-Point Values to Packed Quadword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2QQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F.W1 7B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values from xmm2/m128/m64bcst to two packed quadword integers in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2QQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F.W1 7B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values from ymm2/m256/m64bcst to four packed quadword integers in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2QQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.66.0F.W1 7B /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed double-precision floating-point values from zmm2/m512/m64bcst to eight packed quadword integers in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTPD2UDQ--Convert Packed Double-Precision Floating-Point Values to Packed Unsigned Doubleword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2UDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.0F.W1 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/m128/m64bcst to two unsigned doubleword integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2UDQ</mnem>
+			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.0F.W1 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values in ymm2/m256/m64bcst to four unsigned doubleword integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2UDQ</mnem>
+			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.0F.W1 79 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed double-precision floating-point values in zmm2/m512/m64bcst to eight unsigned doubleword integers in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTPD2UQQ--Convert Packed Double-Precision Floating-Point Values to Packed Unsigned Quadword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2UQQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F.W1 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values from xmm2/mem to two packed unsigned quadword integers in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2UQQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F.W1 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert fourth packed double-precision floating-point values from ymm2/mem to four packed unsigned quadword integers in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2UQQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.66.0F.W1 79 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed double-precision floating-point values from zmm2/mem to eight packed unsigned quadword integers in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTPH2PS--Convert 16-bit FP values to Single-Precision FP values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPH2PS</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 1313 /r</opc>
+			<cpuid>
+				<flag>F16C</flag>
+			</cpuid>
+			<dscrp>Convert four packed half precision (16-bit) floatingpoint values in xmm2/m64 to packed single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPH2PS</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 1313 /r</opc>
+			<cpuid>
+				<flag>F16C</flag>
+			</cpuid>
+			<dscrp>Convert eight packed half precision (16-bit) floatingpoint values in xmm2/m128 to packed singleprecision floating-point value in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPH2PS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="HVM">EVEX.128.66.0F38.W0 1313 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed half precision (16-bit) floatingpoint values in xmm2/m64 to packed single-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPH2PS</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128</args>
+			<opc openc="HVM">EVEX.256.66.0F38.W0 1313 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed half precision (16-bit) floatingpoint values in xmm2/m128 to packed singleprecision floating-point values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPH2PS</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256 {sae}</args>
+			<opc openc="HVM">EVEX.512.66.0F38.W0 1313 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert sixteen packed half precision (16-bit) floating-point values in ymm2/m256 to packed single-precision floating-point values in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="HVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTPS2PH--Convert Single-Precision FP value to 16-bit FP value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PH</mnem>
+			<args>xmm1/m64,xmm2,imm8</args>
+			<opc openc="MRI">VEX.128.66.0F3A.W0 1D 1D/r ib</opc>
+			<cpuid>
+				<flag>F16C</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floating-point values in xmm2 to packed half-precision (16-bit) floating-point values in xmm1/m64. Imm8 provides rounding controls.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PH</mnem>
+			<args>xmm1/m128,ymm2,imm8</args>
+			<opc openc="MRI">VEX.256.66.0F3A.W0 1D1D /r ib</opc>
+			<cpuid>
+				<flag>F16C</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single-precision floating-point values in ymm2 to packed half-precision (16-bit) floating-point values in xmm1/m128. Imm8 provides rounding controls.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PH</mnem>
+			<args>xmm1/m128 {k1}{z},xmm2,imm8</args>
+			<opc openc="HVM">EVEX.128.66.0F3A.W0 1D1D /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floating-point values in xmm2 to packed half-precision (16-bit) floating-point values in xmm1/m128. Imm8 provides rounding controls.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PH</mnem>
+			<args>xmm1/m256 {k1}{z},ymm2,imm8</args>
+			<opc openc="HVM">EVEX.256.66.0F3A.W0 1D1D /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single-precision floating-point values in ymm2 to packed half-precision (16-bit) floating-point values in xmm1/m256. Imm8 provides rounding controls.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PH</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2{sae},imm8</args>
+			<opc openc="HVM">EVEX.512.66.0F3A.W0 1D1D /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert sixteen packed single-precision floating-point values in zmm2 to packed half-precision (16-bit) floatingpoint values in ymm1/m256. Imm8 provides rounding controls.</dscrp>
+		</ins>
+		<oprndenc openc="MRI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="HVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTPS2DQ--Convert Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTPS2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 5B /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floating-point values from xmm2/mem to four packed signed doubleword values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 5B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floating-point values from xmm2/mem to four packed signed doubleword values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2DQ</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 5B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single-precision floating-point values from ymm2/mem to eight packed signed doubleword values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2DQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.66.0F.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed signed doubleword values in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2DQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.66.0F.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed signed doubleword values in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2DQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.512.66.0F.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert sixteen packed single-precision floating-point values from zmm2/m512/m32bcst to sixteen packed signed doubleword values in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTPS2UDQ--Convert Packed Single-Precision Floating-Point Values to Packed Unsigned Doubleword Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2UDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.0F.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed unsigned doubleword values in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2UDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.0F.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed unsigned doubleword values in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2UDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.512.0F.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert sixteen packed single-precision floating-point values from zmm2/m512/m32bcst to sixteen packed unsigned doubleword values in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTPS2QQ--Convert Packed Single Precision Floating-Point Values to Packed Singed Quadword Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2QQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64/m32bcst</args>
+			<opc openc="HV">EVEX.128.66.0F.W0 7B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed single precision floating-point values from xmm2/m64/m32bcst to two packed signed quadword values in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2QQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="HV">EVEX.256.66.0F.W0 7B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed signed quadword values in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2QQ</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256/m32bcst{er}</args>
+			<opc openc="HV">EVEX.512.66.0F.W0 7B /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed signed quadword values in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="HV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTPS2UQQ--Convert Packed Single Precision Floating-Point Values to Packed Unsigned Quadword Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2UQQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64/m32bcst</args>
+			<opc openc="HV">EVEX.128.66.0F.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed single precision floating-point values from zmm2/m64/m32bcst to two packed unsigned quadword values in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2UQQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="HV">EVEX.256.66.0F.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed unsigned quadword values in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2UQQ</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256/m32bcst{er}</args>
+			<opc openc="HV">EVEX.512.66.0F.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed unsigned quadword values in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="HV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTPS2PD--Convert Packed Single-Precision Floating-Point Values to Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTPS2PD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">0F 5A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert two packed single-precision floating-point values in xmm2/m64 to two packed double-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.0F.WIG 5A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert two packed single-precision floating-point values in xmm2/m64 to two packed double-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PD</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.0F.WIG 5A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floating-point values in xmm2/m128 to four packed double-precision floatingpoint values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64/m32bcst</args>
+			<opc openc="HV">EVEX.128.0F.W0 5A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert two packed single-precision floating-point values in xmm2/m64/m32bcst to packed double-precision floatingpoint values in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PD</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="HV">EVEX.256.0F.W0 5A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floating-point values in xmm2/m128/m32bcst to packed double-precision floating-point values in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PD</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256/m32bcst{sae}</args>
+			<opc openc="HV">EVEX.512.0F.W0 5A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single-precision floating-point values in ymm2/m256/b32bcst to eight packed double-precision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="HV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTQQ2PD--Convert Packed Quadword Integers to Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTQQ2PD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.F3.0F.W1 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed quadword integers from xmm2/m128/m64bcst to packed double-precision floatingpoint values in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTQQ2PD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.F3.0F.W1 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed quadword integers from ymm2/m256/m64bcst to packed double-precision floatingpoint values in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTQQ2PD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.F3.0F.W1 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed quadword integers from zmm2/m512/m64bcst to eight packed double-precision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTQQ2PS--Convert Packed Quadword Integers to Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTQQ2PS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.0F.W1 5B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed quadword integers from xmm2/mem to packed single-precision floating-point values in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTQQ2PS</mnem>
+			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.0F.W1 5B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed quadword integers from ymm2/mem to packed single-precision floating-point values in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTQQ2PS</mnem>
+			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.0F.W1 5B /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed quadword integers from zmm2/mem to eight packed single-precision floating-point values in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTSD2SI--Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTSD2SI</mnem>
+			<args>r32,xmm1/m64</args>
+			<opc openc="RM">F2 0F 2D /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CVTSD2SI</mnem>
+			<args>r64,xmm1/m64</args>
+			<opc openc="RM">F2 REX.W 0F 2D /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed quadword integer signextended into r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSD2SI</mnem>
+			<args>r32,xmm1/m64</args>
+			<opc openc="RM">VEX.128.F2.0F.W0 2D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer r32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSD2SI</mnem>
+			<args>r64,xmm1/m64</args>
+			<opc openc="RM">VEX.128.F2.0F.W1 2D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed quadword integer signextended into r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSD2SI</mnem>
+			<args>r32,xmm1/m64{er}</args>
+			<opc openc="T1F">EVEX.LIG.F2.0F.W0 2D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer r32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSD2SI</mnem>
+			<args>r64,xmm1/m64{er}</args>
+			<opc openc="T1F">EVEX.LIG.F2.0F.W1 2D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed quadword integer signextended into r64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1F">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTSD2USI--Convert Scalar Double-Precision Floating-Point Value to Unsigned Doubleword Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSD2USI</mnem>
+			<args>r32,xmm1/m64{er}</args>
+			<opc openc="T1F">EVEX.LIG.F2.0F.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one unsigned doubleword integer r32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSD2USI</mnem>
+			<args>r64,xmm1/m64{er}</args>
+			<opc openc="T1F">EVEX.LIG.F2.0F.W1 79 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one unsigned quadword integer zeroextended into r64.</dscrp>
+		</ins>
+		<oprndenc openc="T1F">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTSD2SS--Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTSD2SS</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 5A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value in xmm2/m64 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSD2SS</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 5A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value in xmm3/m64 to one single-precision floating-point value and merge with high bits in xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSD2SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 5A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value in xmm3/m64 to one single-precision floating-point value and merge with high bits in xmm2 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTSI2SD--Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTSI2SD</mnem>
+			<args>xmm1,r32/m32</args>
+			<opc openc="RM">F2 0F 2A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one signed doubleword integer from r32/m32 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CVTSI2SD</mnem>
+			<args>xmm1,r/m64</args>
+			<opc openc="RM">F2 REX.W 0F 2A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one signed quadword integer from r/m64 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSI2SD</mnem>
+			<args>xmm1,xmm2,r/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.W0 2A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one signed doubleword integer from r/m32 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSI2SD</mnem>
+			<args>xmm1,xmm2,r/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.W1 2A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one signed quadword integer from r/m64 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSI2SD</mnem>
+			<args>xmm1,xmm2,r/m32</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W0 2A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one signed doubleword integer from r/m32 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSI2SD</mnem>
+			<args>xmm1,xmm2,r/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 2A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one signed quadword integer from r/m64 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTSI2SS--Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTSI2SS</mnem>
+			<args>xmm1,r/m32</args>
+			<opc openc="RM">F3 0F 2A /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Convert one signed doubleword integer from r/m32 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CVTSI2SS</mnem>
+			<args>xmm1,r/m64</args>
+			<opc openc="RM">F3 REX.W 0F 2A /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Convert one signed quadword integer from r/m64 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSI2SS</mnem>
+			<args>xmm1,xmm2,r/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.W0 2A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one signed doubleword integer from r/m32 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSI2SS</mnem>
+			<args>xmm1,xmm2,r/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.W1 2A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one signed quadword integer from r/m64 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSI2SS</mnem>
+			<args>xmm1,xmm2,r/m32{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 2A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one signed doubleword integer from r/m32 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSI2SS</mnem>
+			<args>xmm1,xmm2,r/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W1 2A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one signed quadword integer from r/m64 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTSS2SD--Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTSS2SD</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 5A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value in xmm2/m32 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSS2SD</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 5A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value in xmm3/m32 to one double-precision floating-point value and merge with high bits of xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSS2SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 5A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value in xmm3/m32 to one double-precision floating-point value and merge with high bits of xmm2 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTSS2SI--Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTSS2SI</mnem>
+			<args>r32,xmm1/m32</args>
+			<opc openc="RM">F3 0F 2D /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CVTSS2SI</mnem>
+			<args>r64,xmm1/m32</args>
+			<opc openc="RM">F3 REX.W 0F 2D /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSS2SI</mnem>
+			<args>r32,xmm1/m32</args>
+			<opc openc="RM">VEX.128.F3.0F.W0 2D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSS2SI</mnem>
+			<args>r64,xmm1/m32</args>
+			<opc openc="RM">VEX.128.F3.0F.W1 2D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSS2SI</mnem>
+			<args>r32,xmm1/m32{er}</args>
+			<opc openc="T1F">EVEX.LIG.F3.0F.W0 2D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSS2SI</mnem>
+			<args>r64,xmm1/m32{er}</args>
+			<opc openc="T1F">EVEX.LIG.F3.0F.W1 2D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1F">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTSS2USI--Convert Scalar Single-Precision Floating-Point Value to Unsigned Doubleword Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSS2USI</mnem>
+			<args>r32,xmm1/m32{er}</args>
+			<opc openc="T1F">EVEX.LIG.F3.0F.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one unsigned doubleword integer in r32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSS2USI</mnem>
+			<args>r64,xmm1/m32{er}</args>
+			<opc openc="T1F">EVEX.LIG.F3.0F.W1 79 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one unsigned quadword integer in r64.</dscrp>
+		</ins>
+		<oprndenc openc="T1F">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTTPD2DQ--Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTTPD2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E6 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/mem to two signed doubleword integers in xmm1 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG E6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/mem to two signed doubleword integers in xmm1 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2DQ</mnem>
+			<args>xmm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG E6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values in ymm2/mem to four signed doubleword integers in xmm1 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2DQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F.W1 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/m128/m64bcst to two signed doubleword integers in xmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2DQ</mnem>
+			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F.W1 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values in ymm2/m256/m64bcst to four signed doubleword integers in xmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2DQ</mnem>
+			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{sae}</args>
+			<opc openc="FV">EVEX.512.66.0F.W1 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed double-precision floating-point values in zmm2/m512/m64bcst to eight signed doubleword integers in ymm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTTPD2QQ--Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Quadword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2QQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F.W1 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values from zmm2/m128/m64bcst to two packed quadword integers in zmm1 using truncation with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2QQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F.W1 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values from ymm2/m256/m64bcst to four packed quadword integers in ymm1 using truncation with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2QQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{sae}</args>
+			<opc openc="FV">EVEX.512.66.0F.W1 7A /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed double-precision floating-point values from zmm2/m512 to eight packed quadword integers in zmm1 using truncation with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTTPD2UDQ--Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Unsigned Doubleword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2UDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.0F.W1 78 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/m128/m64bcst to two unsigned doubleword integers in xmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2UDQ</mnem>
+			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.0F.W1 78 02 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values in ymm2/m256/m64bcst to four unsigned doubleword integers in xmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2UDQ</mnem>
+			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{sae}</args>
+			<opc openc="FV">EVEX.512.0F.W1 78 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed double-precision floating-point values in zmm2/m512/m64bcst to eight unsigned doubleword integers in ymm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTTPD2UQQ--Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Unsigned Quadword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2UQQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F.W1 78 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values from xmm2/m128/m64bcst to two packed unsigned quadword integers in xmm1 using truncation with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2UQQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F.W1 78 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values from ymm2/m256/m64bcst to four packed unsigned quadword integers in ymm1 using truncation with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2UQQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{sae}</args>
+			<opc openc="FV">EVEX.512.66.0F.W1 78 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed double-precision floating-point values from zmm2/mem to eight packed unsigned quadword integers in zmm1 using truncation with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTTPS2DQ--Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTTPS2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">F3 0F 5B /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floating-point values from xmm2/mem to four packed signed doubleword values in xmm1 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG 5B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floating-point values from xmm2/mem to four packed signed doubleword values in xmm1 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2DQ</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.F3.0F.WIG 5B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single-precision floating-point values from ymm2/mem to eight packed signed doubleword values in ymm1 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2DQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.F3.0F.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed signed doubleword values in xmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2DQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.F3.0F.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed signed doubleword values in ymm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2DQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst {sae}</args>
+			<opc openc="FV">EVEX.512.F3.0F.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert sixteen packed single-precision floating-point values from zmm2/m512/m32bcst to sixteen packed signed doubleword values in zmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTTPS2UDQ--Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Unsigned Doubleword Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2UDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.0F.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed unsigned doubleword values in xmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2UDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.0F.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed unsigned doubleword values in ymm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2UDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{sae}</args>
+			<opc openc="FV">EVEX.512.0F.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert sixteen packed single-precision floatingpoint values from zmm2/m512/m32bcst to sixteen packed unsigned doubleword values in zmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTTPS2QQ--Convert with Truncation Packed Single Precision Floating-Point Values to Packed Singed Quadword Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2QQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64/m32bcst</args>
+			<opc openc="HV">EVEX.128.66.0F.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed single precision floating-point values from xmm2/m64/m32bcst to two packed signed quadword values in xmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2QQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="HV">EVEX.256.66.0F.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed signed quadword values in ymm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2QQ</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256/m32bcst{sae}</args>
+			<opc openc="HV">EVEX.512.66.0F.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed signed quadword values in zmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="HV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTTPS2UQQ--Convert with Truncation Packed Single Precision Floating-Point Values to Packed Unsigned Quadword Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2UQQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64/m32bcst</args>
+			<opc openc="HV">EVEX.128.66.0F.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed single precision floating-point values from zmm2/m64/m32bcst to two packed unsigned quadword values in zmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2UQQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="HV">EVEX.256.66.0F.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed unsigned quadword values in ymm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2UQQ</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256/m32bcst{sae}</args>
+			<opc openc="HV">EVEX.512.66.0F.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed unsigned quadword values in zmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="HV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTTSD2SI--Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTTSD2SI</mnem>
+			<args>r32,xmm1/m64</args>
+			<opc openc="RM">F2 0F 2C /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer in r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CVTTSD2SI</mnem>
+			<args>r64,xmm1/m64</args>
+			<opc openc="RM">F2 REX.W 0F 2C /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed quadword integer in r64 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTSD2SI</mnem>
+			<args>r32,xmm1/m64</args>
+			<opc openc="RM">VEX.128.F2.0F.W0 2C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer in r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTTSD2SI</mnem>
+			<args>r64,xmm1/m64</args>
+			<opc openc="T1F">VEX.128.F2.0F.W1 2C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed quadword integer in r64 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTSD2SI</mnem>
+			<args>r32,xmm1/m64{sae}</args>
+			<opc openc="T1F">EVEX.LIG.F2.0F.W0 2C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer in r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTTSD2SI</mnem>
+			<args>r64,xmm1/m64{sae}</args>
+			<opc openc="T1F">EVEX.LIG.F2.0F.W1 2C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed quadword integer in r64 using truncation.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1F">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTTSD2USI--Convert with Truncation Scalar Double-Precision Floating-Point Value to Unsigned Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTSD2USI</mnem>
+			<args>r32,xmm1/m64{sae}</args>
+			<opc openc="T1F">EVEX.LIG.F2.0F.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one unsigned doubleword integer r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTTSD2USI</mnem>
+			<args>r64,xmm1/m64{sae}</args>
+			<opc openc="T1F">EVEX.LIG.F2.0F.W1 78 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one unsigned quadword integer zeroextended into r64 using truncation.</dscrp>
+		</ins>
+		<oprndenc openc="T1F">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTTSS2SI--Convert with Truncation Scalar Single-Precision Floating-Point Value to Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTTSS2SI</mnem>
+			<args>r32,xmm1/m32</args>
+			<opc openc="RM">F3 0F 2C /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CVTTSS2SI</mnem>
+			<args>r64,xmm1/m32</args>
+			<opc openc="RM">F3 REX.W 0F 2C /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTSS2SI</mnem>
+			<args>r32,xmm1/m32</args>
+			<opc openc="RM">VEX.128.F3.0F.W0 2C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTTSS2SI</mnem>
+			<args>r64,xmm1/m32</args>
+			<opc openc="RM">VEX.128.F3.0F.W1 2C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTSS2SI</mnem>
+			<args>r32,xmm1/m32{sae}</args>
+			<opc openc="T1F">EVEX.LIG.F3.0F.W0 2C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTTSS2SI</mnem>
+			<args>r64,xmm1/m32{sae}</args>
+			<opc openc="T1F">EVEX.LIG.F3.0F.W1 2C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64 using truncation.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1F">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTTSS2USI--Convert with Truncation Scalar Single-Precision Floating-Point Value to Unsigned Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTSS2USI</mnem>
+			<args>r32,xmm1/m32{sae}</args>
+			<opc openc="T1F">EVEX.LIG.F3.0F.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one unsigned doubleword integer in r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTTSS2USI</mnem>
+			<args>r64,xmm1/m32{sae}</args>
+			<opc openc="T1F">EVEX.LIG.F3.0F.W1 78 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one unsigned quadword integer in r64 using truncation.</dscrp>
+		</ins>
+		<oprndenc openc="T1F">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTUDQ2PD--Convert Packed Unsigned Doubleword Integers to Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUDQ2PD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64/m32bcst</args>
+			<opc openc="HV">EVEX.128.F3.0F.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert two packed unsigned doubleword integers from ymm2/m64/m32bcst to packed double-precision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUDQ2PD</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="HV">EVEX.256.F3.0F.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed doubleprecision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUDQ2PD</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="HV">EVEX.512.F3.0F.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed unsigned doubleword integers from ymm2/m256/m32bcst to eight packed doubleprecision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="HV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTUDQ2PS--Convert Packed Unsigned Doubleword Integers to Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUDQ2PS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.F2.0F.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed single-precision floating-point values in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUDQ2PS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.F2.0F.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed unsigned doubleword integers from ymm2/m256/m32bcst to packed single-precision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUDQ2PS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.512.F2.0F.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert sixteen packed unsigned doubleword integers from zmm2/m512/m32bcst to sixteen packed singleprecision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTUQQ2PD--Convert Packed Unsigned Quadword Integers to Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUQQ2PD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.F3.0F.W1 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed unsigned quadword integers from xmm2/m128/m64bcst to two packed double-precision floating-point values in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUQQ2PD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.F3.0F.W1 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed unsigned quadword integers from ymm2/m256/m64bcst to packed double-precision floatingpoint values in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUQQ2PD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.F3.0F.W1 7A /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed unsigned quadword integers from zmm2/m512/m64bcst to eight packed double-precision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTUQQ2PS--Convert Packed Unsigned Quadword Integers to Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUQQ2PS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.F2.0F.W1 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed unsigned quadword integers from xmm2/m128/m64bcst to packed single-precision floatingpoint values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUQQ2PS</mnem>
+			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.F2.0F.W1 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed unsigned quadword integers from ymm2/m256/m64bcst to packed single-precision floatingpoint values in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUQQ2PS</mnem>
+			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.F2.0F.W1 7A /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed unsigned quadword integers from zmm2/m512/m64bcst to eight packed single-precision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTUSI2SD--Convert Unsigned Integer to Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUSI2SD</mnem>
+			<args>xmm1,xmm2,r/m32</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W0 7B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one unsigned doubleword integer from r/m32 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTUSI2SD</mnem>
+			<args>xmm1,xmm2,r/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 7B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one unsigned quadword integer from r/m64 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTUSI2SS--Convert Unsigned Integer to Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUSI2SS</mnem>
+			<args>xmm1,xmm2,r/m32{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 7B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one signed doubleword integer from r/m32 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTUSI2SS</mnem>
+			<args>xmm1,xmm2,r/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W1 7B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one signed quadword integer from r/m64 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VDBPSADBW--Double Block Packed Sum-Absolute-Differences (SAD) on Unsigned Bytes.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VDBPSADBW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F3A.W0 42 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compute packed SAD word results of unsigned bytes in dword block from xmm2 with unsigned bytes of dword blocks transformed from xmm3/m128 using the shuffle controls in imm8. Results are written to xmm1 under the writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDBPSADBW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F3A.W0 42 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compute packed SAD word results of unsigned bytes in dword block from ymm2 with unsigned bytes of dword blocks transformed from ymm3/m256 using the shuffle controls in imm8. Results are written to ymm1 under the writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDBPSADBW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F3A.W0 42 /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compute packed SAD word results of unsigned bytes in dword block from zmm2 with unsigned bytes of dword blocks transformed from zmm3/m512 using the shuffle controls in imm8. Results are written to zmm1 under the writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VEXPANDPD--Load Sparse Packed Double-Precision Floating-Point Values from Dense Memory.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXPANDPD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 88 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed double-precision floating-point values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXPANDPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 88 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed double-precision floating-point values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXPANDPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 88 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed double-precision floating-point values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VEXPANDPS--Load Sparse Packed Single-Precision Floating-Point Values from Dense Memory.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXPANDPS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 88 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed single-precision floating-point values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXPANDPS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 88 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed single-precision floating-point values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXPANDPS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 88 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed single-precision floating-point values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VEXTRACTF128/VEXTRACTF32x4/VEXTRACTF64x2/VEXTRACTF32x8/VEXTRACTF64x4--Extr act Packed Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTF128</mnem>
+			<args>xmm1/m128,ymm2,imm8</args>
+			<opc openc="RMI">VEX.256.66.0F3A.W0 19 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of packed floating-point values from ymm2 and store results in xmm1/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTF32X4</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2,imm8</args>
+			<opc openc="T4">EVEX.256.66.0F3A.W0 19 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of packed single-precision floatingpoint values from ymm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTF32x4</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2,imm8</args>
+			<opc openc="T4">EVEX.512.66.0F3A.W0 19 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of packed single-precision floatingpoint values from zmm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTF64X2</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2,imm8</args>
+			<opc openc="T2">EVEX.256.66.0F3A.W1 19 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of packed double-precision floating-point values from ymm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTF64X2</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2,imm8</args>
+			<opc openc="T2">EVEX.512.66.0F3A.W1 19 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of packed double-precision floating-point values from zmm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTF32X8</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2,imm8</args>
+			<opc openc="T8">EVEX.512.66.0F3A.W0 1B /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Extract 256 bits of packed single-precision floatingpoint values from zmm2 and store results in ymm1/m256 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTF64x4</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2,imm8</args>
+			<opc openc="T4">EVEX.512.66.0F3A.W1 1B /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Extract 256 bits of packed double-precision floating-point values from zmm2 and store results in ymm1/m256 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T4">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T8">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VEXTRACTI128/VEXTRACTI32x4/VEXTRACTI64x2/VEXTRACTI32x8/VEXTRACTI64x4--Extract packed Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTI128</mnem>
+			<args>xmm1/m128,ymm2,imm8</args>
+			<opc openc="RMI">VEX.256.66.0F3A.W0 39 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of integer data from ymm2 and store results in xmm1/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTI32X4</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2,imm8</args>
+			<opc openc="T4">EVEX.256.66.0F3A.W0 39 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of double-word integer values from ymm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTI32x4</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2,imm8</args>
+			<opc openc="T4">EVEX.512.66.0F3A.W0 39 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of double-word integer values from zmm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTI64X2</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2,imm8</args>
+			<opc openc="T2">EVEX.256.66.0F3A.W1 39 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of quad-word integer values from ymm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTI64X2</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2,imm8</args>
+			<opc openc="T2">EVEX.512.66.0F3A.W1 39 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of quad-word integer values from zmm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTI32X8</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2,imm8</args>
+			<opc openc="T8">EVEX.512.66.0F3A.W0 3B /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Extract 256 bits of double-word integer values from zmm2 and store results in ymm1/m256 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTI64x4</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2,imm8</args>
+			<opc openc="T4">EVEX.512.66.0F3A.W1 3B /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Extract 256 bits of quad-word integer values from zmm2 and store results in ymm1/m256 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T4">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T8">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>EXTRACTPS--Extract Packed Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>EXTRACTPS</mnem>
+			<args>reg/m32,xmm1,imm8</args>
+			<opc openc="RMI">66 0F 3A 17 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Extract one single-precision floating-point value from xmm1 at the offset specified by imm8 and store the result in reg or m32. Zero extend the results in 64-bit register if applicable.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTPS</mnem>
+			<args>reg/m32,xmm1,imm8</args>
+			<opc openc="RMI">VEX.128.66.0F3A.WIG 17 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract one single-precision floating-point value from xmm1 at the offset specified by imm8 and store the result in reg or m32. Zero extend the results in 64-bit register if applicable.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTPS</mnem>
+			<args>reg/m32,xmm1,imm8</args>
+			<opc openc="T1S">EVEX.128.66.0F3A.WIG 17 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Extract one single-precision floating-point value from xmm1 at the offset specified by imm8 and store the result in reg or m32. Zero extend the results in 64-bit register if applicable.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFIXUPIMMPD--Fix Up Special Packed Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFIXUPIMMPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W1 54 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Fix up special numbers in float64 vector xmm1, float64 vector xmm2 and int64 vector xmm3/m128/m64bcst and store the result in xmm1, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFIXUPIMMPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 54 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Fix up special numbers in float64 vector ymm1, float64 vector ymm2 and int64 vector ymm3/m256/m64bcst and store the result in ymm1, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFIXUPIMMPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 54 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Fix up elements of float64 vector in zmm2 using int64 vector table in zmm3/m512/m64bcst, combine with preserved elements from zmm1, and store the result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFIXUPIMMPS--Fix Up Special Packed Float32 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFIXUPIMMPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W0 54 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Fix up special numbers in float32 vector xmm1, float32 vector xmm2 and int32 vector xmm3/m128/m32bcst and store the result in xmm1, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFIXUPIMMPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 54 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Fix up special numbers in float32 vector ymm1, float32 vector ymm2 and int32 vector ymm3/m256/m32bcst and store the result in ymm1, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFIXUPIMMPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 54 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Fix up elements of float32 vector in zmm2 using int32 vector table in zmm3/m512/m32bcst, combine with preserved elements from zmm1, and store the result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFIXUPIMMSD--Fix Up Special Scalar Float64 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFIXUPIMMSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W1 55 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Fix up a float64 number in the low quadword element of xmm2 using scalar int32 table in xmm3/m64 and store the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFIXUPIMMSS--Fix Up Special Scalar Float32 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFIXUPIMMSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W0 55 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Fix up a float32 number in the low doubleword element in xmm2 using scalar int32 table in xmm3/m32 and store the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMADD132PD/VFMADD213PD/VFMADD231PD--Fused Multiply-Add of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 98 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/mem, add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 A8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, add to xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 B8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/mem, add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 98 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/mem, add to ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 A8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, add to ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 B8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/mem, add to ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="RVM">EVEX.NDS.128.66.0F38.W1 98 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/m128/m64bcst, add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 A8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, add to xmm3/m128/m64bcst and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 B8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/m128/m64bcst, add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 98 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/m256/m64bcst, add to ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 A8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, add to ymm3/m256/m64bcst and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 B8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/m256/m64bcst, add to ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 98 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm3/m512/m64bcst, add to zmm2 and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 A8 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm2, add to zmm3/m512/m64bcst and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 B8 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm2 and zmm3/m512/m64bcst, add to zmm1 and put result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMADD132PS/VFMADD213PS/VFMADD231PS--Fused Multiply-Add of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 98 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/mem, add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 A8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, add to xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 B8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/mem, add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 98 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/mem, add to ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 A8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, add to ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 B8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/mem, add to ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 98 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/m128/m32bcst, add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 A8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, add to xmm3/m128/m32bcst and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 B8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/m128/m32bcst, add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 98 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/m256/m32bcst, add to ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 A8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, add to ymm3/m256/m32bcst and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 B8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/m256/m32bcst, add to ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 98 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm3/m512/m32bcst, add to zmm2 and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 A8 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm2, add to zmm3/m512/m32bcst and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 B8 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm2 and zmm3/m512/m32bcst, add to zmm1 and put result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMADD132SD/VFMADD213SD/VFMADD231SD--Fused Multiply-Add of Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 99 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/m64, add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 A9 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, add to xmm3/m64 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 B9 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/m64, add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 99 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/m64, add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 A9 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, add to xmm3/m64 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 B9 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/m64, add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMADD132SS/VFMADD213SS/VFMADD231SS--Fused Multiply-Add of Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 99 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 A9 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, add to xmm3/m32 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 B9 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 99 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 A9 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, add to xmm3/m32 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 B9 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm2 an.d xmm3/m32, add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMADDSUB132PD/VFMADDSUB213PD/VFMADDSUB231PD--Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 96 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/mem, add/subtract elements in xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 A6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, add/subtract elements in xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 B6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/mem, add/subtract elements in xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W1 96 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/mem, add/subtract elements in ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W1 A6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, add/subtract elements in ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W1 B6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/mem, add/subtract elements in ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 A6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, add/subtract elements in xmm3/m128/m64bcst and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 B6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/m128/m64bcst, add/subtract elements in xmm1 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 96 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/m128/m64bcst, add/subtract elements in xmm2 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 A6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, add/subtract elements in ymm3/m256/m64bcst and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 B6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/m256/m64bcst, add/subtract elements in ymm1 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 96 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/m256/m64bcst, add/subtract elements in ymm2 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 A6 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1and zmm2, add/subtract elements in zmm3/m512/m64bcst and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 B6 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm2 and zmm3/m512/m64bcst, add/subtract elements in zmm1 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 96 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm3/m512/m64bcst, add/subtract elements in zmm2 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMADDSUB132PS/VFMADDSUB213PS/VFMADDSUB231PS--Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 96 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/mem, add/subtract elements in xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 A6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, add/subtract elements in xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 B6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/mem, add/subtract elements in xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W0 96 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/mem, add/subtract elements in ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W0 A6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, add/subtract elements in ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W0 B6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/mem, add/subtract elements in ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 A6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, add/subtract elements in xmm3/m128/m32bcst and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 B6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/m128/m32bcst, add/subtract elements in xmm1 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 96 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/m128/m32bcst, add/subtract elements in zmm2 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 A6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, add/subtract elements in ymm3/m256/m32bcst and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 B6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/m256/m32bcst, add/subtract elements in ymm1 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 96 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/m256/m32bcst, add/subtract elements in ymm2 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 A6 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm2, add/subtract elements in zmm3/m512/m32bcst and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 B6 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm2 and zmm3/m512/m32bcst, add/subtract elements in zmm1 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 96 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm3/m512/m32bcst, add/subtract elements in zmm2 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMSUBADD132PD/VFMSUBADD213PD/VFMSUBADD231PD--Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 97 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/mem, subtract/add elements in xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 A7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, subtract/add elements in xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 B7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/mem, subtract/add elements in xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W1 97 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/mem, subtract/add elements in ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W1 A7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, subtract/add elements in ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W1 B7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/mem, subtract/add elements in ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 97 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/m128/m64bcst, subtract/add elements in xmm2 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 A7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, subtract/add elements in xmm3/m128/m64bcst and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 B7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/m128/m64bcst, subtract/add elements in xmm1 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 97 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/m256/m64bcst, subtract/add elements in ymm2 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 A7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, subtract/add elements in ymm3/m256/m64bcst and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 B7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/m256/m64bcst, subtract/add elements in ymm1 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 97 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm3/m512/m64bcst, subtract/add elements in zmm2 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 A7 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm2, subtract/add elements in zmm3/m512/m64bcst and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 B7 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm2 and zmm3/m512/m64bcst, subtract/add elements in zmm1 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMSUBADD132PS/VFMSUBADD213PS/VFMSUBADD231PS--Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 97 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/mem, subtract/add elements in xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 A7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, subtract/add elements in xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 B7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/mem, subtract/add elements in xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W0 97 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/mem, subtract/add elements in ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W0 A7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, subtract/add elements in ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W0 B7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/mem, subtract/add elements in ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 97 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/m128/m32bcst, subtract/add elements in xmm2 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 A7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, subtract/add elements in xmm3/m128/m32bcst and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 B7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/m128/m32bcst, subtract/add elements in xmm1 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 97 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/m256/m32bcst, subtract/add elements in ymm2 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 A7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, subtract/add elements in ymm3/m256/m32bcst and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 B7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/m256/m32bcst, subtract/add elements in ymm1 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 97 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm3/m512/m32bcst, subtract/add elements in zmm2 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 A7 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm2, subtract/add elements in zmm3/m512/m32bcst and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 B7 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm2 and zmm3/m512/m32bcst, subtract/add elements in zmm1 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMSUB132PD/VFMSUB213PD/VFMSUB231PD--Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 9A /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/mem, subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 AA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, subtract xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 BA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/mem, subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 9A /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/mem, subtract ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 AA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, subtract ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 BA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/mem, subtract ymm1 and put result in ymm1.S.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 9A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/m128/m64bcst, subtract xmm2 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 AA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, subtract xmm3/m128/m64bcst and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 BA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/m128/m64bcst, subtract xmm1 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 9A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/m256/m64bcst, subtract ymm2 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 AA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, subtract ymm3/m256/m64bcst and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 BA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/m256/m64bcst, subtract ymm1 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 9A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm3/m512/m64bcst, subtract zmm2 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 AA /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm2, subtract zmm3/m512/m64bcst and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 BA /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm2 and zmm3/m512/m64bcst, subtract zmm1 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMSUB132PS/VFMSUB213PS/VFMSUB231PS--Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 9A /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/mem, subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 AA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, subtract xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 BA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/mem, subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 9A /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/mem, subtract ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 AA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, subtract ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 BA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/mem, subtract ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 9A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/m128/m32bcst, subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 AA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, subtract xmm3/m128/m32bcst and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 BA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/m128/m32bcst, subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 9A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/m256/m32bcst, subtract ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 AA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, subtract ymm3/m256/m32bcst and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 BA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/m256/m32bcst, subtract ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 9A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm3/m512/m32bcst, subtract zmm2 and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 AA /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm2, subtract zmm3/m512/m32bcst and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 BA /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm2 and zmm3/m512/m32bcst, subtract zmm1 and put result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMSUB132SD/VFMSUB213SD/VFMSUB231SD--Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 9B /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/m64, subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 AB /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, subtract xmm3/m64 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 BB /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/m64, subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 9B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/m64, subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 AB /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, subtract xmm3/m64 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 BB /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/m64, subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMSUB132SS/VFMSUB213SS/VFMSUB231SS--Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 9B /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 AB /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, subtract xmm3/m32 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 BB /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 9B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 AB /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, subtract xmm3/m32 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 BB /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMADD132PD/VFNMADD213PD/VFNMADD231PD--Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 9C /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/mem, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 AC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, negate the multiplication result and add to xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 BC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/mem, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 9C /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/mem, negate the multiplication result and add to ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 AC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, negate the multiplication result and add to ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 BC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/mem, negate the multiplication result and add to ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PD</mnem>
+			<args>xmm0 {k1}{z},xmm1,xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 9C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/m128/m64bcst, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 AC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, negate the multiplication result and add to xmm3/m128/m64bcst and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 BC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/m128/m64bcst, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 9C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/m256/m64bcst, negate the multiplication result and add to ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 AC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, negate the multiplication result and add to ymm3/m256/m64bcst and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 BC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/m256/m64bcst, negate the multiplication result and add to ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 9C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm3/m512/m64bcst, negate the multiplication result and add to zmm2 and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 AC /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm2, negate the multiplication result and add to zmm3/m512/m64bcst and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 BC /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm2 and zmm3/m512/m64bcst, negate the multiplication result and add to zmm1 and put result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMADD132PS/VFNMADD213PS/VFNMADD231PS--Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 9C /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/mem, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 AC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, negate the multiplication result and add to xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 BC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/mem, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 9C /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/mem, negate the multiplication result and add to ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 AC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, negate the multiplication result and add to ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 BC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/mem, negate the multiplication result and add to ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 9C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/m128/m32bcst, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 AC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, negate the multiplication result and add to xmm3/m128/m32bcst and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 BC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/m128/m32bcst, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 9C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/m256/m32bcst, negate the multiplication result and add to ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 AC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, negate the multiplication result and add to ymm3/m256/m32bcst and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 BC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/m256/m32bcst, negate the multiplication result and add to ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 9C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm3/m512/m32bcst, negate the multiplication result and add to zmm2 and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 AC /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm2, negate the multiplication result and add to zmm3/m512/m32bcst and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 BC /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm2 and zmm3/m512/m32bcst, negate the multiplication result and add to zmm1 and put result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMADD132SD/VFNMADD213SD/VFNMADD231SD--Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 9D /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/mem, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 AD /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, negate the multiplication result and add to xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 BD /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/mem, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 9D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/m64, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 AD /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, negate the multiplication result and add to xmm3/m64 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 BD /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/m64, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMADD132SS/VFNMADD213SS/VFNMADD231SS--Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.LIG.128.66.0F38.W0 9D /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.LIG.128.66.0F38.W0 AD /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, negate the multiplication result and add to xmm3/m32 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.LIG.128.66.0F38.W0 BD /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 9D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 AD /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, negate the multiplication result and add to xmm3/m32 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 BD /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMSUB132PD/VFNMSUB213PD/VFNMSUB231PD--Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 9E /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/mem, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 AE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, negate the multiplication result and subtract xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 BE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/mem, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 9E /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/mem, negate the multiplication result and subtract ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 AE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, negate the multiplication result and subtract ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 BE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/mem, negate the multiplication result and subtract ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 9E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/m128/m64bcst, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 AE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, negate the multiplication result and subtract xmm3/m128/m64bcst and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 BE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/m128/m64bcst, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 9E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/m256/m64bcst, negate the multiplication result and subtract ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 AE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, negate the multiplication result and subtract ymm3/m256/m64bcst and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 BE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/m256/m64bcst, negate the multiplication result and subtract ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 9E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm3/m512/m64bcst, negate the multiplication result and subtract zmm2 and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 AE /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm2, negate the multiplication result and subtract zmm3/m512/m64bcst and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 BE /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm2 and zmm3/m512/m64bcst, negate the multiplication result and subtract zmm1 and put result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMSUB132PS/VFNMSUB213PS/VFNMSUB231PS--Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 9E /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/mem, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 AE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, negate the multiplication result and subtract xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 BE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/mem, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 9E /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/mem, negate the multiplication result and subtract ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 AE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, negate the multiplication result and subtract ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 BE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/mem, negate the multiplication result and subtract ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 9E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/m128/m32bcst, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 AE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, negate the multiplication result and subtract xmm3/m128/m32bcst and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 BE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/m128/m32bcst, negate the multiplication result subtract add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 9E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/m256/m32bcst, negate the multiplication result and subtract ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 AE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, negate the multiplication result and subtract ymm3/m256/m32bcst and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 BE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/m256/m32bcst, negate the multiplication result subtract add to ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 9E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm3/m512/m32bcst, negate the multiplication result and subtract zmm2 and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 AE /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm2, negate the multiplication result and subtract zmm3/m512/m32bcst and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 BE /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm2 and zmm3/m512/m32bcst, negate the multiplication result subtract add to zmm1 and put result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMSUB132SD/VFNMSUB213SD/VFNMSUB231SD--Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 9F /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/mem, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 AF /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, negate the multiplication result and subtract xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 BF /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/mem, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 9F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/m64, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 AF /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, negate the multiplication result and subtract xmm3/m64 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 BF /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/m64, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMSUB132SS/VFNMSUB213SS/VFNMSUB231SS--Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 9F /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 AF /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, negate the multiplication result and subtract xmm3/m32 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 BF /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 9F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 AF /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, negate the multiplication result and subtract xmm3/m32 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 BF /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFPCLASSPD--Tests Types Of a Packed Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFPCLASSPD</mnem>
+			<args>k2 {k1},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W1 66 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFPCLASSPD</mnem>
+			<args>k2 {k1},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W1 66 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFPCLASSPD</mnem>
+			<args>k2 {k1},zmm2/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W1 66 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFPCLASSPS--Tests Types Of a Packed Float32 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFPCLASSPS</mnem>
+			<args>k2 {k1},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W0 66 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFPCLASSPS</mnem>
+			<args>k2 {k1},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W0 66 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFPCLASSPS</mnem>
+			<args>k2 {k1},zmm2/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W0 66 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFPCLASSSD--Tests Types Of a Scalar Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFPCLASSSD</mnem>
+			<args>k2 {k1},xmm2/m64,imm8</args>
+			<opc openc="T1S">EVEX.LIG.66.0F3A.W1 67 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFPCLASSSS--Tests Types Of a Scalar Float32 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFPCLASSSS</mnem>
+			<args>k2 {k1},xmm2/m32,imm8</args>
+			<opc openc="T1S">EVEX.LIG.66.0F3A.W0 67 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPGATHERDD/VPGATHERDQ--Gather Packed Dword, Packed Qword with Signed Dword Indices.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERDD</mnem>
+			<args>xmm1 {k1},vm32x</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 90 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather dword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERDD</mnem>
+			<args>ymm1 {k1},vm32y</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 90 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather dword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERDD</mnem>
+			<args>zmm1 {k1},vm32z</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 90 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather dword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERDQ</mnem>
+			<args>xmm1 {k1},vm32x</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 90 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather quadword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERDQ</mnem>
+			<args>ymm1 {k1},vm32x</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 90 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather quadword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERDQ</mnem>
+			<args>zmm1 {k1},vm32y</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 90 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather quadword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPGATHERQD/VPGATHERQQ--Gather Packed Dword, Packed Qword with Signed Qword Indices.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERQD</mnem>
+			<args>xmm1 {k1},vm64x</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 91 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather dword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERQD</mnem>
+			<args>xmm1 {k1},vm64y</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 91 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather dword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERQD</mnem>
+			<args>ymm1 {k1},vm64z</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 91 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather dword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERQQ</mnem>
+			<args>xmm1 {k1},vm64x</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 91 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather quadword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERQQ</mnem>
+			<args>ymm1 {k1},vm64y</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 91 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather quadword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERQQ</mnem>
+			<args>zmm1 {k1},vm64z</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 91 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather quadword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGATHERDPS/VGATHERDPD--Gather Packed Single, Packed Double with Signed Dword.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERDPS</mnem>
+			<args>xmm1 {k1},vm32x</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 92 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather single-precision floatingpoint values from memory using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERDPS</mnem>
+			<args>ymm1 {k1},vm32y</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 92 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather single-precision floatingpoint values from memory using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERDPS</mnem>
+			<args>zmm1 {k1},vm32z</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 92 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather single-precision floatingpoint values from memory using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERDPD</mnem>
+			<args>xmm1 {k1},vm32x</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 92 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather float64 vector into float64 vector xmm1 using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERDPD</mnem>
+			<args>ymm1 {k1},vm32x</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 92 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather float64 vector into float64 vector ymm1 using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERDPD</mnem>
+			<args>zmm1 {k1},vm32y</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 92 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather float64 vector into float64 vector zmm1 using k1 as completion mask.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGATHERQPS/VGATHERQPD--Gather Packed Single, Packed Double with Signed Qword Indices.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERQPS</mnem>
+			<args>xmm1 {k1},vm64x</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 93 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather single-precision floating-point values from memory using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERQPS</mnem>
+			<args>xmm1 {k1},vm64y</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 93 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather single-precision floating-point values from memory using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERQPS</mnem>
+			<args>ymm1 {k1},vm64z</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 93 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather single-precision floating-point values from memory using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERQPD</mnem>
+			<args>xmm1 {k1},vm64x</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 93 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather float64 vector into float64 vector xmm1 using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERQPD</mnem>
+			<args>ymm1 {k1},vm64y</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 93 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather float64 vector into float64 vector ymm1 using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERQPD</mnem>
+			<args>zmm1 {k1},vm64z</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 93 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather float64 vector into float64 vector zmm1 using k1 as completion mask.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGETEXPPD--Convert Exponents of Packed DP FP Values to DP FP Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETEXPPD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W1 42 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert the exponent of packed double-precision floating-point values in the source operand to DP FP results representing unbiased integer exponents and stores the results in the destination register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETEXPPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W1 42 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert the exponent of packed double-precision floating-point values in the source operand to DP FP results representing unbiased integer exponents and stores the results in the destination register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETEXPPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{sae}</args>
+			<opc openc="FV">EVEX.512.66.0F38.W1 42 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert the exponent of packed double-precision floating-point values in the source operand to DP FP results representing unbiased integer exponents and stores the results in the destination under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGETEXPPS--Convert Exponents of Packed SP FP Values to SP FP Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETEXPPS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W0 42 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert the exponent of packed single-precision floating-point values in the source operand to SP FP results representing unbiased integer exponents and stores the results in the destination register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETEXPPS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W0 42 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert the exponent of packed single-precision floating-point values in the source operand to SP FP results representing unbiased integer exponents and stores the results in the destination register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETEXPPS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{sae}</args>
+			<opc openc="FV">EVEX.512.66.0F38.W0 42 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert the exponent of packed single-precision floating-point values in the source operand to SP FP results representing unbiased integer exponents and stores the results in the destination register.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGETEXPSD--Convert Exponents of Scalar DP FP Values to DP FP Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETEXPSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W1 43 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert the biased exponent (bits 62:52) of the low doubleprecision floating-point value in xmm3/m64 to a DP FP value representing unbiased integer exponent. Stores the result to the low 64-bit of xmm1 under the writemask k1 and merge with the other elements of xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGETEXPSS--Convert Exponents of Scalar SP FP Values to SP FP Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETEXPSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W0 43 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert the biased exponent (bits 30:23) of the low singleprecision floating-point value in xmm3/m32 to a SP FP value representing unbiased integer exponent. Stores the result to xmm1 under the writemask k1 and merge with the other elements of xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGETMANTPD--Extract Float64 Vector of Normalized Mantissas from Float64 Vector.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETMANTPD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W1 26 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Get Normalized Mantissa from float64 vector xmm2/m128/m64bcst and store the result in xmm1, using imm8 for sign control and mantissa interval normalization, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETMANTPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W1 26 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Get Normalized Mantissa from float64 vector ymm2/m256/m64bcst and store the result in ymm1, using imm8 for sign control and mantissa interval normalization, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETMANTPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W1 26 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Get Normalized Mantissa from float64 vector zmm2/m512/m64bcst and store the result in zmm1, using imm8 for sign control and mantissa interval normalization, under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FVI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGETMANTPS--Extract Float32 Vector of Normalized Mantissas from Float32 Vector.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETMANTPS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W0 26 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Get normalized mantissa from float32 vector xmm2/m128/m32bcst and store the result in xmm1, using imm8 for sign control and mantissa interval normalization, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETMANTPS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W0 26 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Get normalized mantissa from float32 vector ymm2/m256/m32bcst and store the result in ymm1, using imm8 for sign control and mantissa interval normalization, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETMANTPS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W0 26 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Get normalized mantissa from float32 vector zmm2/m512/m32bcst and store the result in zmm1, using imm8 for sign control and mantissa interval normalization, under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FVI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGETMANTSD--Extract Float64 of Normalized Mantissas from Float64 Scalar.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETMANTSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W1 27 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Extract the normalized mantissa of the low float64 element in xmm3/m64 using imm8 for sign control and mantissa interval normalization. Store the mantissa to xmm1 under the writemask k1 and merge with the other elements of xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGETMANTSS--Extract Float32 Vector of Normalized Mantissa from Float32 Vector.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETMANTSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W0 27 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Extract the normalized mantissa from the low float32 element of xmm3/m32 using imm8 for sign control and mantissa interval normalization, store the mantissa to xmm1 under the writemask k1 and merge with the other elements of xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VINSERTF128/VINSERTF32x4/VINSERTF64x2/VINSERTF32x8/VINSERTF64x4--Insert Packed Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTF128</mnem>
+			<args>ymm1,ymm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F3A.W0 18 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of packed floating-point values from xmm3/m128 and the remaining values from ymm2 into ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTF32X4</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128,imm8</args>
+			<opc openc="T4">EVEX.NDS.256.66.0F3A.W0 18 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of packed single-precision floatingpoint values from xmm3/m128 and the remaining values from ymm2 into ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTF32X4</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128,imm8</args>
+			<opc openc="T4">EVEX.NDS.512.66.0F3A.W0 18 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of packed single-precision floatingpoint values from xmm3/m128 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTF64X2</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128,imm8</args>
+			<opc openc="T2">EVEX.NDS.256.66.0F3A.W1 18 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of packed double-precision floatingpoint values from xmm3/m128 and the remaining values from ymm2 into ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTF64X2</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128,imm8</args>
+			<opc openc="T2">EVEX.NDS.512.66.0F3A.W1 18 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of packed double-precision floatingpoint values from xmm3/m128 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTF32X8</mnem>
+			<args>zmm1 {k1}{z},zmm2,ymm3/m256,imm8</args>
+			<opc openc="T8">EVEX.NDS.512.66.0F3A.W0 1A /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Insert 256 bits of packed single-precision floatingpoint values from ymm3/m256 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTF64X4</mnem>
+			<args>zmm1 {k1}{z},zmm2,ymm3/m256,imm8</args>
+			<opc openc="T4">EVEX.NDS.512.66.0F3A.W1 1A /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Insert 256 bits of packed double-precision floatingpoint values from ymm3/m256 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T4">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T8">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VINSERTI128/VINSERTI32x4/VINSERTI64x2/VINSERTI32x8/VINSERTI64x4--Insert Packed Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTI128</mnem>
+			<args>ymm1,ymm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F3A.W0 38 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of integer data from xmm3/m128 and the remaining values from ymm2 into ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTI32X4</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128,imm8</args>
+			<opc openc="T4">EVEX.NDS.256.66.0F3A.W0 38 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of packed doubleword integer values from xmm3/m128 and the remaining values from ymm2 into ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTI32X4</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128,imm8</args>
+			<opc openc="T4">EVEX.NDS.512.66.0F3A.W0 38 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of packed doubleword integer values from xmm3/m128 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTI64X2</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128,imm8</args>
+			<opc openc="T2">EVEX.NDS.256.66.0F3A.W1 38 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of packed quadword integer values from xmm3/m128 and the remaining values from ymm2 into ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTI64X2</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128,imm8</args>
+			<opc openc="T2">EVEX.NDS.512.66.0F3A.W1 38 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of packed quadword integer values from xmm3/m128 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTI32X8</mnem>
+			<args>zmm1 {k1}{z},zmm2,ymm3/m256,imm8</args>
+			<opc openc="T8">EVEX.NDS.512.66.0F3A.W0 3A /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Insert 256 bits of packed doubleword integer values from ymm3/m256 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTI64X4</mnem>
+			<args>zmm1 {k1}{z},zmm2,ymm3/m256,imm8</args>
+			<opc openc="T4">EVEX.NDS.512.66.0F3A.W1 3A /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Insert 256 bits of packed quadword integer values from ymm3/m256 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T4">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T8">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>INSERTPS--Insert Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>INSERTPS</mnem>
+			<args>xmm1,xmm2/m32,imm8</args>
+			<opc openc="RMI">66 0F 3A 21 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Insert a single-precision floating-point value selected by imm8 from xmm2/m32 into xmm1 at the specified destination element specified by imm8 and zero out destination elements in xmm1 as indicated in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTPS</mnem>
+			<args>xmm1,xmm2,xmm3/m32,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.WIG 21 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Insert a single-precision floating-point value selected by imm8 from xmm3/m32 and merge with values in xmm2 at the specified destination element specified by imm8 and write out the result and zero out destination elements in xmm1 as indicated in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTPS</mnem>
+			<args>xmm1,xmm2,xmm3/m32,imm8</args>
+			<opc openc="T1S">EVEX.NDS.128.66.0F3A.W0 21 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Insert a single-precision floating-point value selected by imm8 from xmm3/m32 and merge with values in xmm2 at the specified destination element specified by imm8 and write out the result and zero out destination elements in xmm1 as indicated in imm8.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MAXPD--Maximum of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MAXPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 5F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the maximum double-precision floating-point values between xmm1 and xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 5F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the maximum double-precision floating-point values between xmm2 and xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 5F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the maximum packed double-precision floating-point values between ymm2 and ymm3/m256.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 5F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the maximum packed double-precision floating-point values between xmm2 and xmm3/m128/m64bcst and store result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 5F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the maximum packed double-precision floating-point values between ymm2 and ymm3/m256/m64bcst and store result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{sae}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 5F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the maximum packed double-precision floating-point values between zmm2 and zmm3/m512/m64bcst and store result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MAXPS--Maximum of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MAXPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 5F /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the maximum single-precision floating-point values between xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 5F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the maximum single-precision floating-point values between xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 5F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the maximum single-precision floating-point values between ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 5F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the maximum packed single-precision floating-point values between xmm2 and xmm3/m128/m32bcst and store result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 5F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the maximum packed single-precision floating-point values between ymm2 and ymm3/m256/m32bcst and store result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{sae}</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 5F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the maximum packed single-precision floating-point values between zmm2 and zmm3/m512/m32bcst and store result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MAXSD--Return Maximum Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MAXSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 5F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the maximum scalar double-precision floating-point value between xmm2/m64 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 5F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the maximum scalar double-precision floating-point value between xmm3/m64 and xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 5F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the maximum scalar double-precision floating-point value between xmm3/m64 and xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MAXSS--Return Maximum Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MAXSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 5F /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the maximum scalar single-precision floating-point value between xmm2/m32 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 5F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the maximum scalar single-precision floating-point value between xmm3/m32 and xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 5F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the maximum scalar single-precision floating-point value between xmm3/m32 and xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MINPD--Minimum of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MINPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 5D /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the minimum double-precision floating-point values between xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 5D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the minimum double-precision floating-point values between xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 5D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the minimum packed double-precision floating-point values between ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 5D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the minimum packed double-precision floating-point values between xmm2 and xmm3/m128/m64bcst and store result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 5D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the minimum packed double-precision floating-point values between ymm2 and ymm3/m256/m64bcst and store result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{sae}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 5D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the minimum packed double-precision floating-point values between zmm2 and zmm3/m512/m64bcst and store result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MINPS--Minimum of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MINPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 5D /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the minimum single-precision floating-point values between xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 5D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the minimum single-precision floating-point values between xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 5D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the minimum single double-precision floating-point values between ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 5D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the minimum packed single-precision floating-point values between xmm2 and xmm3/m128/m32bcst and store result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 5D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the minimum packed single-precision floating-point values between ymm2 and ymm3/m256/m32bcst and store result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{sae}</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 5D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the minimum packed single-precision floating-point values between zmm2 and zmm3/m512/m32bcst and store result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MINSD--Return Minimum Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MINSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 5D /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the minimum scalar double-precision floatingpoint value between xmm2/m64 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 5D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the minimum scalar double-precision floatingpoint value between xmm3/m64 and xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 5D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the minimum scalar double-precision floatingpoint value between xmm3/m64 and xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MINSS--Return Minimum Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MINSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 5D /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the minimum scalar single-precision floatingpoint value between xmm2/m32 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 5D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the minimum scalar single-precision floatingpoint value between xmm3/m32 and xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 5D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the minimum scalar single-precision floatingpoint value between xmm3/m32 and xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVAPD--Move Aligned Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVAPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 28 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVAPD</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">66 0F 29 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 28 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG 29 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 28 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from ymm2/mem to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>ymm2/m256,ymm1</args>
+			<opc openc="MR">VEX.256.66.0F.WIG 29 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from ymm1 to ymm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.66.0F.W1 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.66.0F.W1 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.66.0F.W1 28 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>xmm2/m128 {k1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.66.0F.W1 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>ymm2/m256 {k1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.66.0F.W1 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.66.0F.W1 29 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVAPS--Move Aligned Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVAPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 28 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVAPS</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">0F 29 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.0F.WIG 28 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">VEX.128.0F.WIG 29 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.0F.WIG 28 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from ymm2/mem to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>ymm2/m256,ymm1</args>
+			<opc openc="MR">VEX.256.0F.WIG 29 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from ymm1 to ymm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.0F.W0 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.0F.W0 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.0F.W0 28 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>xmm2/m128 {k1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.0F.W0 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>ymm2/m256 {k 1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.0F.W0 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.0F.W0 29 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVD/MOVQ--Move Doubleword and Quadword.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVD</mnem>
+			<args>xmm1,r32/m32</args>
+			<opc openc="MR">66 0F 6E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move doubleword from r/m32 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOVQ</mnem>
+			<args>xmm1,r64/m64</args>
+			<opc openc="MR">66 REX.W 0F 6E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move quadword from r/m64 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVD</mnem>
+			<args>xmm1,r32/m32</args>
+			<opc openc="MR">VEX.128.66.0F.W0 6E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move doubleword from r/m32 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>xmm1,r64/m64</args>
+			<opc openc="MR">VEX.128.66.0F.W1 6E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move quadword from r/m64 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVD</mnem>
+			<args>xmm1,r32/m32</args>
+			<opc openc="T1S">EVEX.128.66.0F.W0 6E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move doubleword from r/m32 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>xmm1,r64/m64</args>
+			<opc openc="T1S">EVEX.128.66.0F.W1 6E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move quadword from r/m64 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVD</mnem>
+			<args>r32/m32,xmm1</args>
+			<opc openc="MR">66 0F 7E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move doubleword from xmm1 register to r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOVQ</mnem>
+			<args>r64/m64,xmm1</args>
+			<opc openc="MR">66 REX.W 0F 7E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm1 register to r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVD</mnem>
+			<args>r32/m32,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.W0 7E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move doubleword from xmm1 register to r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>r64/m64,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.W1 7E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm1 register to r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVD</mnem>
+			<args>r32/m32,xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F.W0 7E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move doubleword from xmm1 register to r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>r64/m64,xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F.W1 7E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm1 register to r/m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVQ--Move Quadword.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F3 0F 7E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm2/m64 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG 7E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm2/m64 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="T1S">EVEX.128.F3.0F.W1 7E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm2/m64 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVQ</mnem>
+			<args>xmm1/m64,xmm2</args>
+			<opc openc="MR">66 0F D6 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm2 register to xmm1/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>xmm1/m64,xmm2</args>
+			<opc openc="MR">VEX.128.66.0F D6.WIG /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm2 register to xmm1/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>xmm1/m64,xmm2</args>
+			<opc openc="T1S">EVEX.128.66.0F.W1 D6 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm2 register to xmm1/m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVDDUP--Replicate Double FP Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVDDUP</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 12 /r</opc>
+			<cpuid>
+				<flag>SSE3</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from xmm2/m64 and duplicate into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDDUP</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.F2.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from xmm2/m64 and duplicate into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDDUP</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.F2.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move even index double-precision floating-point values from ymm2/mem and duplicate each element into ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDDUP</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="DUP">EVEX.128.F2.0F.W1 12 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from xmm2/m64 and duplicate each element into xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDDUP</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="DUP">EVEX.256.F2.0F.W1 12 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move even index double-precision floating-point values from ymm2/m256 and duplicate each element into ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDDUP</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="DUP">EVEX.512.F2.0F.W1 12 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move even index double-precision floating-point values from zmm2/m512 and duplicate each element into zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="DUP-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVDQA,VMOVDQA32/64--Move Aligned Packed Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVDQA</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 6F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move aligned packed integer values from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVDQA</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">66 0F 7F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move aligned packed integer values from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 6F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed integer values from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG 7F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed integer values from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 6F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed integer values from ymm2/mem to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA</mnem>
+			<args>ymm2/m256,ymm1</args>
+			<opc openc="MR">VEX.256.66.0F.WIG 7F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed integer values from ymm1 to ymm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA32</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.66.0F.W0 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed doubleword integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA32</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.66.0F.W0 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed doubleword integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA32</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.66.0F.W0 6F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed doubleword integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA32</mnem>
+			<args>xmm2/m128 {k1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.66.0F.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed doubleword integer values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA32</mnem>
+			<args>ymm2/m256 {k1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.66.0F.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed doubleword integer values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA32</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.66.0F.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed doubleword integer values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA64</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.66.0F.W1 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned quadword integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA64</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.66.0F.W1 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned quadword integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA64</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.66.0F.W1 6F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed quadword integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA64</mnem>
+			<args>xmm2/m128 {k1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.66.0F.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed quadword integer values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA64</mnem>
+			<args>ymm2/m256 {k1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.66.0F.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed quadword integer values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA64</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.66.0F.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed quadword integer values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVDQU,VMOVDQU8/16/32/64--Move Unaligned Packed Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVDQU</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">F3 0F 6F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed integer values from xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVDQU</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">F3 0F 7F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed integer values from xmm1 to xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG 6F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed integer values from xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">VEX.128.F3.0F.WIG 7F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed integer values from xmm1 to xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.F3.0F.WIG 6F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed integer values from ymm2/m256 to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU</mnem>
+			<args>ymm2/m256,ymm1</args>
+			<opc openc="MR">VEX.256.F3.0F.WIG 7F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed integer values from ymm1 to ymm2/m256.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU8</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.F2.0F.W0 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed byte integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU8</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.F2.0F.W0 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed byte integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU8</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.F2.0F.W0 6F /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed byte integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU8</mnem>
+			<args>xmm2/m128 {k1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.F2.0F.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed byte integer values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU8</mnem>
+			<args>ymm2/m256 {k 1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.F2.0F.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed byte integer values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU8</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.F2.0F.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed byte integer values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU16</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.F2.0F.W1 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed word integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU16</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.F2.0F.W1 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed word integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU16</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.F2.0F.W1 6F /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed word integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU16</mnem>
+			<args>xmm2/m128 {k1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.F2.0F.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed word integer values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU16</mnem>
+			<args>ymm2/m256 {k1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.F2.0F.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed word integer values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU16</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.F2.0F.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed word integer values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU32</mnem>
+			<args>xmm1 {k1}{z},xmm2/mm128</args>
+			<opc openc="FVM">EVEX.128.F3.0F.W0 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed doubleword integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU32</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.F3.0F.W0 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed doubleword integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU32</mnem>
+			<args>zmm1 {k 1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.F3.0F.W0 6F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed doubleword integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU32</mnem>
+			<args>xmm2/m128 {k1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.F3.0F.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed doubleword integer values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU32</mnem>
+			<args>ymm2/m256 {k1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.F3.0F.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed doubleword integer values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU32</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.F3.0F.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed doubleword integer values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU64</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.F3.0F.W1 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed quadword integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU64</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.F3.0F.W1 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed quadword integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU64</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.F3.0F.W1 6F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed quadword integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU64</mnem>
+			<args>xmm2/m128 {k1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.F3.0F.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed quadword integer values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU64</mnem>
+			<args>ymm2/m256 {k1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.F3.0F.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed quadword integer values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU64</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.F3.0F.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed quadword integer values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVHLPS--Move Packed Single-Precision Floating-Point Values High to Low.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVHLPS</mnem>
+			<args>xmm1,xmm2</args>
+			<opc openc="RM">0F 12 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from high quadword of xmm2 to low quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHLPS</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floating-point values from high quadword of xmm3 and low quadword of xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHLPS</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="RVM">EVEX.NDS.128.0F.W0 12 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floating-point values from high quadword of xmm3 and low quadword of xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVHPD--Move High Packed Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVHPD</mnem>
+			<args>xmm1,m64</args>
+			<opc openc="RM">66 0F 16 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from m64 to high quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPD</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 16 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge double-precision floating-point value from m64 and the low quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPD</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="T1S">EVEX.NDS.128.66.0F.W1 16 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Merge double-precision floating-point value from m64 and the low quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVHPD</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">66 0F 17 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from high quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPD</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG 17 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from high quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPD</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F.W1 17 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from high quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVHPS--Move High Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVHPS</mnem>
+			<args>xmm1,m64</args>
+			<opc openc="RM">0F 16 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from m64 to high quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPS</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 16 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floating-point values from m64 and the low quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPS</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="T2">EVEX.NDS.128.0F.W0 16 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floating-point values from m64 and the low quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVHPS</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">0F 17 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from high quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPS</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">VEX.128.0F.WIG 17 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from high quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPS</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="T2">EVEX.128.0F.W0 17 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from high quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVLHPS--Move Packed Single-Precision Floating-Point Values Low to High.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVLHPS</mnem>
+			<args>xmm1,xmm2</args>
+			<opc openc="RM">0F 16 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from low quadword of xmm2 to high quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLHPS</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 16 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floating-point values from low quadword of xmm3 and low quadword of xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLHPS</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="RVM">EVEX.NDS.128.0F.W0 16 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floating-point values from low quadword of xmm3 and low quadword of xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVLPD--Move Low Packed Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVLPD</mnem>
+			<args>xmm1,m64</args>
+			<opc openc="RM">66 0F 12 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from m64 to low quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPD</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge double-precision floating-point value from m64 and the high quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPD</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="T1S">EVEX.NDS.128.66.0F.W1 12 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Merge double-precision floating-point value from m64 and the high quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVLPD</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">66 0F 13/r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from low quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPD</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG 13/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from low quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPD</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F.W1 13/r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from low quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVLPS--Move Low Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVLPS</mnem>
+			<args>xmm1,m64</args>
+			<opc openc="RM">0F 12 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from m64 to low quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPS</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floating-point values from m64 and the high quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPS</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="T2">EVEX.NDS.128.0F.W0 12 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floating-point values from m64 and the high quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVLPS</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">0F 13/r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from low quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPS</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">VEX.128.0F.WIG 13/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from low quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPS</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="T2">EVEX.128.0F.W0 13/r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from low quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVNTDQA--Load Double Quadword Non-Temporal Aligned Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVNTDQA</mnem>
+			<args>xmm1,m128</args>
+			<opc openc="RM">66 0F 38 2A /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Move double quadword from m128 to xmm1 using nontemporal hint if WC memory type.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQA</mnem>
+			<args>xmm1,m128</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 2A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move double quadword from m128 to xmm using nontemporal hint if WC memory type.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQA</mnem>
+			<args>ymm1,m256</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 2A /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Move 256-bit data from m256 to ymm using non-temporal hint if WC memory type.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQA</mnem>
+			<args>xmm1,m128</args>
+			<opc openc="FVM">EVEX.128.66.0F38 2A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move 128-bit data from m128 to xmm using non-temporal hint if WC memory type.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQA</mnem>
+			<args>ymm1,m256</args>
+			<opc openc="FVM">EVEX.256.66.0F38 2A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move 256-bit data from m256 to ymm using non-temporal hint if WC memory type.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQA</mnem>
+			<args>zmm1,m512</args>
+			<opc openc="FVM">EVEX.512.66.0F38.W0 2A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move 512-bit data from m512 to zmm using non-temporal hint if WC memory type.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVNTDQ--Store Packed Integers Using Non-Temporal Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVNTDQ</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="MR">66 0F E7 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move packed integer values in xmm1 to m128 using nontemporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQ</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG E7 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move packed integer values in xmm1 to m128 using nontemporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQ</mnem>
+			<args>m256,ymm1</args>
+			<opc openc="MR">VEX.256.66.0F.WIG E7 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move packed integer values in ymm1 to m256 using nontemporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQ</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="FVM">EVEX.128.66.0F E7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move packed integer values in xmm1 to m128 using nontemporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQ</mnem>
+			<args>m256,ymm1</args>
+			<opc openc="FVM">EVEX.256.66.0F E7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move packed integer values in zmm1 to m256 using nontemporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQ</mnem>
+			<args>m512,zmm1</args>
+			<opc openc="FVM">EVEX.512.66.0F.W0 E7 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move packed integer values in zmm1 to m512 using nontemporal hint.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVNTPD--Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVNTPD</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="MR">66 0F 2B /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision values in xmm1 to m128 using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPD</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG 2B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision values in xmm1 to m128 using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPD</mnem>
+			<args>m256,ymm1</args>
+			<opc openc="MR">VEX.256.66.0F.WIG 2B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision values in ymm1 to m256 using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPD</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="FVM">EVEX.128.66.0F.W1 2B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision values in xmm1 to m128 using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPD</mnem>
+			<args>m256,ymm1</args>
+			<opc openc="FVM">EVEX.256.66.0F.W1 2B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision values in ymm1 to m256 using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPD</mnem>
+			<args>m512,zmm1</args>
+			<opc openc="FVM">EVEX.512.66.0F.W1 2B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision values in zmm1 to m512 using non-temporal hint.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVNTPS--Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVNTPS</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="MR">0F 2B /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision values xmm1 to mem using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPS</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="MR">VEX.128.0F.WIG 2B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision values xmm1 to mem using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPS</mnem>
+			<args>m256,ymm1</args>
+			<opc openc="MR">VEX.256.0F.WIG 2B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision values ymm1 to mem using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPS</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="FVM">EVEX.128.66.0F.W0 2B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision values in xmm1 to m128 using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPS</mnem>
+			<args>m256,ymm1</args>
+			<opc openc="FVM">EVEX.256.66.0F.W0 2B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision values in ymm1 to m256 using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPS</mnem>
+			<args>m512,zmm1</args>
+			<opc openc="FVM">EVEX.512.0F.W0 2B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision values in zmm1 to m512 using non-temporal hint.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVSD--Move or Merge Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSD</mnem>
+			<args>xmm1,xmm2</args>
+			<opc openc="RM">F2 0F 10 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move scalar double-precision floating-point value from xmm2 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSD</mnem>
+			<args>xmm1,m64</args>
+			<opc openc="RM">F2 0F 10 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Load scalar double-precision floating-point value from m64 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSD</mnem>
+			<args>xmm1/m64,xmm2</args>
+			<opc openc="MR">F2 0F 11 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move scalar double-precision floating-point value from xmm2 register to xmm1/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="RVM">VEX.NDS.LIG.F2.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge scalar double-precision floating-point value from xmm2 and xmm3 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>xmm1,m64</args>
+			<opc openc="XM">VEX.LIG.F2.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Load scalar double-precision floating-point value from m64 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="MVR">VEX.NDS.LIG.F2.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge scalar double-precision floating-point value from xmm2 and xmm3 registers to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">VEX.LIG.F2.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Store scalar double-precision floating-point value from xmm1 register to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3</args>
+			<opc openc="RVM">EVEX.NDS.LIG.F2.0F.W1 10 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Merge scalar double-precision floating-point value from xmm2 and xmm3 registers to xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>xmm1 {k1}{z},m64</args>
+			<opc openc="T1S">EVEX.LIG.F2.0F.W1 10 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Load scalar double-precision floating-point value from m64 to xmm1 register under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3</args>
+			<opc openc="MVR">EVEX.NDS.LIG.F2.0F.W1 11 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Merge scalar double-precision floating-point value from xmm2 and xmm3 registers to xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>m64 {k1},xmm1</args>
+			<opc openc="T1S">EVEX.LIG.F2.0F.W1 11 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Store scalar double-precision floating-point value from xmm1 register to m64 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="XM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MVR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>vvvv(r)</oprnd2>
+			<oprnd3>ModRM:reg(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVSHDUP--Replicate Single FP Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSHDUP</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">F3 0F 16 /r</opc>
+			<cpuid>
+				<flag>SSE3</flag>
+			</cpuid>
+			<dscrp>Move odd index single-precision floating-point values from xmm2/mem and duplicate each element into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSHDUP</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG 16 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move odd index single-precision floating-point values from xmm2/mem and duplicate each element into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSHDUP</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.F3.0F.WIG 16 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move odd index single-precision floating-point values from ymm2/mem and duplicate each element into ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSHDUP</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.F3.0F.W0 16 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move odd index single-precision floating-point values from xmm2/m128 and duplicate each element into xmm1 under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSHDUP</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.F3.0F.W0 16 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move odd index single-precision floating-point values from ymm2/m256 and duplicate each element into ymm1 under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSHDUP</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.F3.0F.W0 16 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move odd index single-precision floating-point values from zmm2/m512 and duplicate each element into zmm1 under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVSLDUP--Replicate Single FP Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSLDUP</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="A">F3 0F 12 /r</opc>
+			<cpuid>
+				<flag>SSE3</flag>
+			</cpuid>
+			<dscrp>Move even index single-precision floating-point values from xmm2/mem and duplicate each element into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSLDUP</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move even index single-precision floating-point values from xmm2/mem and duplicate each element into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSLDUP</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.F3.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move even index single-precision floating-point values from ymm2/mem and duplicate each element into ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSLDUP</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.F3.0F.W0 12 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move even index single-precision floating-point values from xmm2/m128 and duplicate each element into xmm1 under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSLDUP</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.F3.0F.W0 12 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move even index single-precision floating-point values from ymm2/m256 and duplicate each element into ymm1 under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSLDUP</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.F3.0F.W0 12 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move even index single-precision floating-point values from zmm2/m512 and duplicate each element into zmm1 under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVSS--Move or Merge Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSS</mnem>
+			<args>xmm1,xmm2</args>
+			<opc openc="RM">F3 0F 10 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Merge scalar single-precision floating-point value from xmm2 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSS</mnem>
+			<args>xmm1,m32</args>
+			<opc openc="RM">F3 0F 10 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Load scalar single-precision floating-point value from m32 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="RVM">VEX.NDS.LIG.F3.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge scalar single-precision floating-point value from xmm2 and xmm3 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>xmm1,m32</args>
+			<opc openc="XM">VEX.LIG.F3.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Load scalar single-precision floating-point value from m32 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSS</mnem>
+			<args>xmm2/m32,xmm1</args>
+			<opc openc="MR">F3 0F 11 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move scalar single-precision floating-point value from xmm1 register to xmm2/m32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="MVR">VEX.NDS.LIG.F3.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move scalar single-precision floating-point value from xmm2 and xmm3 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>m32,xmm1</args>
+			<opc openc="MR">VEX.LIG.F3.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move scalar single-precision floating-point value from xmm1 register to m32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3</args>
+			<opc openc="RVM">EVEX.NDS.LIG.F3.0F.W0 10 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move scalar single-precision floating-point value from xmm2 and xmm3 to xmm1 register under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>xmm1 {k1}{z},m32</args>
+			<opc openc="T1S">EVEX.LIG.F3.0F.W0 10 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move scalar single-precision floating-point values from m32 to xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3</args>
+			<opc openc="MVR">EVEX.NDS.LIG.F3.0F.W0 11 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move scalar single-precision floating-point value from xmm2 and xmm3 to xmm1 register under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>m32 {k1},xmm1</args>
+			<opc openc="T1S">EVEX.LIG.F3.0F.W0 11 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move scalar single-precision floating-point values from xmm1 to m32 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="XM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MVR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>vvvv(r)</oprnd2>
+			<oprnd3>ModRM:reg(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVUPD--Move Unaligned Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVUPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 10 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVUPD</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">66 0F 11 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from ymm2/mem to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>ymm2/m256,ymm1</args>
+			<opc openc="MR">VEX.256.66.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from ymm1 to ymm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.66.0F.W1 10 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>xmm2/m128 {k1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.66.0F.W1 11 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.66.0F.W1 10 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>ymm2/m256 {k1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.66.0F.W1 11 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.66.0F.W1 10 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.66.0F.W1 11 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVUPS--Move Unaligned Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVUPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 10 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVUPS</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">0F 11 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">VEX.128.0F 11.WIG /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.0F 10.WIG /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point from ymm2/mem to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>ymm2/m256,ymm1</args>
+			<opc openc="MR">VEX.256.0F 11.WIG /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point from ymm1 to ymm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.0F.W0 10 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.0F.W0 10 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.0F.W0 10 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>xmm2/m128 {k 1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.0F.W0 11 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>ymm2/m256 {k1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.0F.W0 11 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.0F.W0 11 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSADBW--Compute Sum of Absolute Differences.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSADBW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F6 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Computes the absolute differences of the packed unsigned byte integers from xmm2 /m128 and xmm1; the 8 low differences and 8 high differences are then summed separately to produce two unsigned word integer results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSADBW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F F6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes the absolute differences of the packed unsigned byte integers from xmm3 /m128 and xmm2; the 8 low differences and 8 high differences are then summed separately to produce two unsigned word integer results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSADBW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F F6 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Computes the absolute differences of the packed unsigned byte integers from ymm3 /m256 and ymm2; then each consecutive 8 differences are summed separately to produce four unsigned word integer results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSADBW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG F6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Computes the absolute differences of the packed unsigned byte integers from xmm3 /m128 and xmm2; then each consecutive 8 differences are summed separately to produce four unsigned word integer results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSADBW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG F6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Computes the absolute differences of the packed unsigned byte integers from ymm3 /m256 and ymm2; then each consecutive 8 differences are summed separately to produce four unsigned word integer results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSADBW</mnem>
+			<args>zmm1,zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG F6 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Computes the absolute differences of the packed unsigned byte integers from zmm3 /m512 and zmm2; then each consecutive 8 differences are summed separately to produce four unsigned word integer results.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MULPD--Multiply Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MULPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 59 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values in xmm2/m128 with xmm1 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 59 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values in xmm3/m128 with xmm2 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 59 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values in ymm3/m256 with ymm2 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 59 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm3/m128/m64bcst to xmm2 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 59 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm3/m256/m64bcst to ymm2 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 59 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values in zmm3/m512/m64bcst with zmm2 and store result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MULPS--Multiply Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MULPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 59 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values in xmm2/m128 with xmm1 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 59 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values in xmm3/m128 with xmm2 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 59 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values in ymm3/m256 with ymm2 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm3/m128/m32bcst to xmm2 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm3/m256/m32bcst to ymm2 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst {er}</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values in zmm3/m512/m32bcst with zmm2 and store result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MULSD--Multiply Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MULSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 59 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply the low double-precision floating-point value in xmm2/m64 by low double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 59 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the low double-precision floating-point value in xmm3/m64 by low double-precision floating-point value in xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64 {er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 59 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply the low double-precision floating-point value in xmm3/m64 by low double-precision floating-point value in xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MULSS--Multiply Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MULSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 59 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Multiply the low single-precision floating-point value in xmm2/m32 by the low single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 59 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the low single-precision floating-point value in xmm3/m32 by the low single-precision floating-point value in xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32 {er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply the low single-precision floating-point value in xmm3/m32 by the low single-precision floating-point value in xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ORPD--Bitwise Logical OR of Packed Double Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ORPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 56/r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F 56 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F 56 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 56 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in xmm2 and xmm3/m128/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 56 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in ymm2 and ymm3/m256/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 56 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in zmm2 and zmm3/m512/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ORPS--Bitwise Logical OR of Packed Single Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ORPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 56 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed single-precision floating-point values in xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F 56 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed single-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F 56 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed single-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 56 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed single-precision floating-point values in xmm2 and xmm3/m128/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 56 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed single-precision floating-point values in ymm2 and ymm3/m256/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 56 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed single-precision floating-point values in zmm2 and zmm3/m512/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PABSB/PABSW/PABSD/PABSQ--Packed Absolute Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PABSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 1C /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of bytes in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PABSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 1D /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 16-bit integers in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PABSD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 1E /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 32-bit integers in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 1C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of bytes in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 1D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 16-bit integers in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 1E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 32-bit integers in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSB</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 1C /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of bytes in ymm2/m256 and store UNSIGNED result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSW</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 1D /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 16-bit integers in ymm2/m256 and store UNSIGNED result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSD</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 1E /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 32-bit integers in ymm2/m256 and store UNSIGNED result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSB</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.66.0F 38 1C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of bytes in xmm2/m128 and store UNSIGNED result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSB</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.66.0F 38 1C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of bytes in ymm2/m256 and store UNSIGNED result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSB</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.66.0F 38 1C /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of bytes in zmm2/m512 and store UNSIGNED result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSW</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.66.0F 38 1D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 16-bit integers in xmm2/m128 and store UNSIGNED result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSW</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.66.0F 38 1D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 16-bit integers in ymm2/m256 and store UNSIGNED result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSW</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.66.0F 38 1D /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 16-bit integers in zmm2/m512 and store UNSIGNED result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.66.0F.W0 38 1E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 32-bit integers in xmm2/m128/m32bcst and store UNSIGNED result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.66.0F.W0 38 1E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 32-bit integers in ymm2/m256/m32bcst and store UNSIGNED result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W0 1E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 32-bit integers in zmm2/m512/m32bcst and store UNSIGNED result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F.W1 38 1F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 64-bit integers in xmm2/m128/m64bcst and store UNSIGNED result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F.W1 38 1F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 64-bit integers in ymm2/m256/m64bcst and store UNSIGNED result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W1 1F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 64-bit integers in zmm2/m512/m64bcst and store UNSIGNED result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PACKSSWB/PACKSSDW--Pack with Signed Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PACKSSWB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 63 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed word integers from xmm1 and from xxm2/m128 into 16 packed signed byte integers in xmm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PACKSSDW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 6B /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 packed signed word integers in xmm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSWB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F 63 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed word integers from xmm2 and from xmm3/m128 into 16 packed signed byte integers in xmm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSDW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F 6B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed signed doubleword integers from xmm2 and from xmm3/m128 into 8 packed signed word integers in xmm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSWB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F 63 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed signed word integers from ymm2 and from ymm3/m256 into 32 packed signed byte integers in ymm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSDW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F 6B /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed doubleword integers from ymm2 and from ymm3/m256 into 16 packed signed word integers in ymm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSWB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 63 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts packed signed word integers from xmm2 and from xmm3/m128 into packed signed byte integers in xmm1 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSWB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 63 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts packed signed word integers from ymm2 and from ymm3/m256 into packed signed byte integers in ymm1 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSWB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 63 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts packed signed word integers from zmm2 and from zmm3/m512 into packed signed byte integers in zmm1 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSDW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 6B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts packed signed doubleword integers from xmm2 and from xmm3/m128/m32bcst into packed signed word integers in xmm1 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSDW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 6B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts packed signed doubleword integers from ymm2 and from ymm3/m256/m32bcst into packed signed word integers in ymm1 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSDW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 6B /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts packed signed doubleword integers from zmm2 and from zmm3/m512/m32bcst into packed signed word integers in zmm1 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PACKUSDW--Pack with Unsigned Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PACKUSDW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 2B /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Convert 4 packed signed doubleword integers from xmm1 and 4 packed signed doubleword integers from xmm2/m128 into 8 packed unsigned word integers in xmm1 using unsigned saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSDW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38 2B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert 4 packed signed doubleword integers from xmm2 and 4 packed signed doubleword integers from xmm3/m128 into 8 packed unsigned word integers in xmm1 using unsigned saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSDW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38 2B /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Convert 8 packed signed doubleword integers from ymm2 and 8 packed signed doubleword integers from ymm3/m256 into 16 packed unsigned word integers in ymm1 using unsigned saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSDW</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 2B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Convert packed signed doubleword integers from xmm2 and packed signed doubleword integers from xmm3/m128/m32bcst into packed unsigned word integers in xmm1 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSDW</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 2B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Convert packed signed doubleword integers from ymm2 and packed signed doubleword integers from ymm3/m256/m32bcst into packed unsigned word integers in ymm1 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSDW</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 2B /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Convert packed signed doubleword integers from zmm2 and packed signed doubleword integers from zmm3/m512/m32bcst into packed unsigned word integers in zmm1 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PACKUSWB--Pack with Unsigned Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PACKUSWB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 67 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Converts 8 signed word integers from xmm1 and 8 signed word integers from xmm2/m128 into 16 unsigned byte integers in xmm1 using unsigned saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSWB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F 67 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Converts 8 signed word integers from xmm2 and 8 signed word integers from xmm3/m128 into 16 unsigned byte integers in xmm1 using unsigned saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSWB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F 67 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Converts 16 signed word integers from ymm2 and 16 signed word integers from ymm3/m256 into 32 unsigned byte integers in ymm1 using unsigned saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSWB</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 67 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts signed word integers from xmm2 and signed word integers from xmm3/m128 into unsigned byte integers in xmm1 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSWB</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 67 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts signed word integers from ymm2 and signed word integers from ymm3/m256 into unsigned byte integers in ymm1 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSWB</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 67 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts signed word integers from zmm2 and signed word integers from zmm3/m512 into unsigned byte integers in zmm1 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PADDB/PADDW/PADDD/PADDQ--Add Packed Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F FC /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed byte integers from xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F FD /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed word integers from xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F FE /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed doubleword integers from xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D4 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed quadword integers from xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FC /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed byte integers from xmm2, and xmm3/m128 and store in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FD /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed word integers from xmm2, xmm3/m128 and store in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FE /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed doubleword integers from xmm2, xmm3/m128 and store in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D4 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed quadword integers from xmm2, xmm3/m128 and store in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FC /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed byte integers from ymm2, and ymm3/m256 and store in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FD /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed word integers from ymm2, ymm3/m256 and store in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FE /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed doubleword integers from ymm2, ymm3/m256 and store in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D4 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed quadword integers from ymm2, ymm3/m256 and store in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG FC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed byte integers from xmm2, and xmm3/m128 and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG FD /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed word integers from xmm2, and xmm3/m128 and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 FE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed doubleword integers from xmm2, and xmm3/m128/m32bcst and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 D4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed quadword integers from xmm2, and xmm3/m128/m64bcst and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG FC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed byte integers from ymm2, and ymm3/m256 and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG FD /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed word integers from ymm2, and ymm3/m256 and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 FE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed doubleword integers from ymm2, ymm3/m256/m32bcst and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 D4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed quadword integers from ymm2, ymm3/m256/m64bcst and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG FC /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed byte integers from zmm2, and zmm3/m512 and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG FD /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed word integers from zmm2, and zmm3/m512 and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 FE /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed doubleword integers from zmm2, zmm3/m512/m32bcst and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 D4 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed quadword integers from zmm2, zmm3/m512/m64bcst and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PADDSB/PADDSW--Add Packed Signed Integers with Signed Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F EC /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed signed byte integers from xmm2/m128 and xmm1 and saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F ED /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed signed word integers from xmm2/m128 and xmm1 and saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F EC</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed signed byte integers from xmm2, and xmm3/m128 and store the saturated results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F ED</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed signed word integers from xmm2, and xmm3/m128 and store the saturated results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F EC</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed signed byte integers from ymm2, and ymm3/m256 and store the saturated results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F ED</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed signed word integers from ymm2, and ymm3/m256 and store the saturated results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG EC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed signed byte integers from xmm2, and xmm3/m128 and store the saturated results in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG EC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed signed byte integers from ymm2, and ymm3/m256 and store the saturated results in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG EC /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed signed byte integers from zmm2, and zmm3/m512 and store the saturated results in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG ED /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed signed word integers from xmm2, and xmm3/m128 and store the saturated results in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG ED /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed signed word integers from ymm2, and ymm3/m256 and store the saturated results in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG ED /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed signed word integers from zmm2, and zmm3/m512 and store the saturated results in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PADDUSB/PADDUSW--Add Packed Unsigned Integers with Unsigned Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDUSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F DC /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned byte integers from xmm2/m128 and xmm1 and saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDUSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F DD /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned word integers from xmm2/m128 and xmm1 and saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F DC</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned byte integers from xmm2, and xmm3/m128 and store the saturated results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F DD</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned word integers from xmm2, and xmm3/m128 and store the saturated results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F DC</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned byte integers from ymm2, and ymm3/m256 and store the saturated results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F DD</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned word integers from ymm2, and ymm3/m256 and store the saturated results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG DC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned byte integers from xmm2, and xmm3/m128 and store the saturated results in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG DC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned byte integers from ymm2, and ymm3/m256 and store the saturated results in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG DC /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned byte integers from zmm2, and zmm3/m512 and store the saturated results in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG DD /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned word integers from xmm2, and xmm3/m128 and store the saturated results in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG DD /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned word integers from ymm2, and ymm3/m256 and store the saturated results in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG DD /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned word integers from zmm2, and zmm3/m512 and store the saturated results in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PALIGNR--Byte Align.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PALIGNR</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RM">66 0F 3A 0F /r ib</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Concatenate destination and source operands, extract byte aligned result shifted to the right by constant value in imm8 and result is stored in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPALIGNR</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F3A 0F /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Concatenate xmm2 and xmm3/m128 into a 32-byte intermediate result, extract byte aligned result shifted to the right by constant value in imm8 and result is stored in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPALIGNR</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F3A 0F /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Concatenate pairs of 16 bytes in ymm2 and ymm3/m256 into 32-byte intermediate result, extract byte-aligned, 16-byte result shifted to the right by constant values in imm8 from each intermediate result, and two 16-byte results are stored in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPALIGNR</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F3A.WIG 0F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Concatenate xmm2 and xmm3/m128 into a 32-byte intermediate result, extract byte aligned result shifted to the right by constant value in imm8 and result is stored in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPALIGNR</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256 imm8</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F3A.WIG 0F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Concatenate pairs of 16 bytes in ymm2 and ymm3/m256 into 32-byte intermediate result, extract byte-aligned, 16-byte result shifted to the right by constant values in imm8 from each intermediate result, and two 16-byte results are stored in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPALIGNR</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F3A.WIG 0F /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Concatenate pairs of 16 bytes in zmm2 and zmm3/m512 into 32-byte intermediate result, extract byte-aligned, 16-byte result shifted to the right by constant values in imm8 from each intermediate result, and four 16-byte results are stored in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PAND--Logical AND.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PAND</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F DB /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAND</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG DB /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of xmm2, and xmm3/m128 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAND</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG DB /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of ymm2, and ymm3/m256 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 DB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed doubleword integers in xmm2 and xmm3/m128/m32bcst and store result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 DB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed doubleword integers in ymm2 and ymm3/m256/m32bcst and store result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 DB /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed doubleword integers in zmm2 and zmm3/m512/m32bcst and store result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 DB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed quadword integers in xmm2 and xmm3/m128/m64bcst and store result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 DB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed quadword integers in ymm2 and ymm3/m256/m64bcst and store result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 DB /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed quadword integers in zmm2 and zmm3/m512/m64bcst and store result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PANDN--Logical AND NOT.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PANDN</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F DF /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDN</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG DF /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of xmm2, and xmm3/m128 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDN</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG DF /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of ymm2, and ymm3/m256 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDND</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 DF /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of packed doubleword integers in xmm2 and xmm3/m128/m32bcst and store result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDND</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 DF /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of packed doubleword integers in ymm2 and ymm3/m256/m32bcst and store result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDND</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 DF /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of packed doubleword integers in zmm2 and zmm3/m512/m32bcst and store result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDNQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 DF /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of packed quadword integers in xmm2 and xmm3/m128/m64bcst and store result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDNQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 DF /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of packed quadword integers in ymm2 and ymm3/m256/m64bcst and store result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDNQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 DF /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of packed quadword integers in zmm2 and zmm3/m512/m64bcst and store result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PAVGB/PAVGW--Average Packed Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PAVGB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E0,/r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned byte integers from xmm2/m128 and xmm1 with rounding.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PAVGW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E3,/r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F E0</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned byte integers from xmm2, and xmm3/m128 with rounding and store to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F E3</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned word integers from xmm2, xmm3/m128 with rounding to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F E0</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned byte integers from ymm2, and ymm3/m256 with rounding and store to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F E3</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned word integers from ymm2, ymm3/m256 with rounding to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG E0 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned byte integers from xmm2, and xmm3/m128 with rounding and store to xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG E0 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned byte integers from ymm2, and ymm3/m256 with rounding and store to ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG E0 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned byte integers from zmm2, and zmm3/m512 with rounding and store to zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG E3 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned word integers from xmm2, xmm3/m128 with rounding to xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG E3 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned word integers from ymm2, ymm3/m256 with rounding to ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG E3 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned word integers from zmm2, zmm3/m512 with rounding to zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPBROADCASTM--Broadcast Mask to Vector Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTMB2Q</mnem>
+			<args>xmm1,k1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W1 2A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Broadcast low byte value in k1 to two locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTMB2Q</mnem>
+			<args>ymm1,k1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W1 2A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Broadcast low byte value in k1 to four locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTMB2Q</mnem>
+			<args>zmm1,k1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W1 2A /r</opc>
+			<cpuid>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Broadcast low byte value in k1 to eight locations in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTMW2D</mnem>
+			<args>xmm1,k1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W0 3A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Broadcast low word value in k1 to four locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTMW2D</mnem>
+			<args>ymm1,k1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W0 3A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Broadcast low word value in k1 to eight locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTMW2D</mnem>
+			<args>zmm1,k1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W0 3A /r</opc>
+			<cpuid>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Broadcast low word value in k1 to sixteen locations in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PCMPEQB/PCMPEQW/PCMPEQD/PCMPEQQ--Compare Packed Integers for Equality.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPEQB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 74 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed bytes in xmm2/m128 and xmm1 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPEQW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 75 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed words in xmm2/m128 and xmm1 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPEQD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 76 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed doublewords in xmm2/m128 and xmm1 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPEQQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 29 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed quadwords in xmm2/m128 and xmm1 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQB</mnem>
+			<args>xmm1,xmm2,xmm3 /m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 74 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed bytes in xmm3/m128 and xmm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 75 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed words in xmm3/m128 and xmm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 76 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed doublewords in xmm3/m128 and xmm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 29 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed quadwords in xmm3/m128 and xmm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQB</mnem>
+			<args>ymm1,ymm2,ymm3 /m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 74 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed bytes in ymm3/m256 and ymm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQW</mnem>
+			<args>ymm1,ymm2,ymm3 /m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 75 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed words in ymm3/m256 and ymm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQD</mnem>
+			<args>ymm1,ymm2,ymm3 /m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 76 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed doublewords in ymm3/m256 and ymm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQQ</mnem>
+			<args>ymm1,ymm2,ymm3 /m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 29 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed quadwords in ymm3/m256 and ymm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQD</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 76 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Equal between int32 vector xmm2 and int32 vector xmm3/m128/m32bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQD</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 76 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Equal between int32 vector ymm2 and int32 vector ymm3/m256/m32bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQD</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 76 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Equal between int32 vectors in zmm2 and zmm3/m512/m32bcst, and set destination k1 according to the comparison results under writemask k2,.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQQ</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Equal between int64 vector xmm2 and int64 vector xmm3/m128/m64bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQQ</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Equal between int64 vector ymm2 and int64 vector ymm3/m256/m64bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQQ</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 29 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Equal between int64 vector zmm2 and int64 vector zmm3/m512/m64bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQB</mnem>
+			<args>k1 {k2},xmm2,xmm3 /m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 74 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed bytes in xmm3/m128 and xmm2 for equality and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQB</mnem>
+			<args>k1 {k2},ymm2,ymm3 /m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 74 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed bytes in ymm3/m256 and ymm2 for equality and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQB</mnem>
+			<args>k1 {k2},zmm2,zmm3 /m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 74 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed bytes in zmm3/m512 and zmm2 for equality and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQW</mnem>
+			<args>k1 {k2},xmm2,xmm3 /m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 75 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed words in xmm3/m128 and xmm2 for equality and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQW</mnem>
+			<args>k1 {k2},ymm2,ymm3 /m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 75 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed words in ymm3/m256 and ymm2 for equality and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQW</mnem>
+			<args>k1 {k2},zmm2,zmm3 /m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 75 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed words in zmm3/m512 and zmm2 for equality and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PCMPGTB/PCMPGTW/PCMPGTD/PCMPGTQ--Compare Packed Integers for Greater Than.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPGTB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 64 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm1 and xmm2/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPGTW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 65 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm1 and xmm2/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPGTD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 66 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed doubleword integers in xmm1 and xmm2/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPGTQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 37 /r</opc>
+			<cpuid>
+				<flag>SSE4_2</flag>
+			</cpuid>
+			<dscrp>Compare packed qwords in xmm2/m128 and xmm1 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 64 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 65 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm2 and xmm3/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 66 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed doubleword integers in xmm2 and xmm3/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 37 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qwords in xmm2 and xmm3/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 64 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 65 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in ymm2 and ymm3/m256 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 66 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed doubleword integers in ymm2 and ymm3/m256 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 37 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qwords in ymm2 and ymm3/m256 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTD</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 66 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Greater between int32 vector xmm2 and int32 vector xmm3/m128/m32bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTD</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 66 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Greater between int32 vector ymm2 and int32 vector ymm3/m256/m32bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTD</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 66 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Greater between int32 elements in zmm2 and zmm3/m512/m32bcst, and set destination k1 according to the comparison results under writemask. k2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTQ</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 37 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Greater between int64 vector xmm2 and int64 vector xmm3/m128/m64bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTQ</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 37 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Greater between int64 vector ymm2 and int64 vector ymm3/m256/m64bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTQ</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 37 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Greater between int64 vector zmm2 and int64 vector zmm3/m512/m64bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTB</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 64 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 for greater than, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTB</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 64 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 for greater than, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTB</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 64 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in zmm2 and zmm3/m512 for greater than, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTW</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 65 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm2 and xmm3/m128 for greater than, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTW</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 65 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in ymm2 and ymm3/m256 for greater than, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTW</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 65 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in zmm2 and zmm3/m512 for greater than, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPCMPB/VPCMPUB--Compare Packed Byte Values Into Mask.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPB</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F3A.W0 3F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte values in xmm3/m128 and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPB</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F3A.W0 3F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte values in ymm3/m256 and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPB</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F3A.W0 3F /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte values in zmm3/m512 and zmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUB</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F3A.W0 3E /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte values in xmm3/m128 and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUB</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F3A.W0 3E /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte values in ymm3/m256 and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUB</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F3A.W0 3E /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte values in zmm3/m512 and zmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPCMPD/VPCMPUD--Compare Packed Integer Values into Mask.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPD</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W0 1F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed doubleword integer values in xmm3/m128/m32bcst and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPD</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 1F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed doubleword integer values in ymm3/m256/m32bcst and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPD</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 1F /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed doubleword integer values in zmm2 and zmm3/m512/m32bcst using bits 2:0 of imm8 as a comparison predicate. The comparison results are written to the destination k1 under writemask k2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUD</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W0 1E /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned doubleword integer values in xmm3/m128/m32bcst and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUD</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 1E /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned doubleword integer values in ymm3/m256/m32bcst and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUD</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 1E /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned doubleword integer values in zmm2 and zmm3/m512/m32bcst using bits 2:0 of imm8 as a comparison predicate. The comparison results are written to the destination k1 under writemask k2.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPCMPQ/VPCMPUQ--Compare Packed Integer Values into Mask.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPQ</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W1 1F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed quadword integer values in xmm3/m128/m64bcst and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPQ</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 1F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed quadword integer values in ymm3/m256/m64bcst and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPQ</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 1F /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed quadword integer values in zmm3/m512/m64bcst and zmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUQ</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W1 1E /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned quadword integer values in xmm3/m128/m64bcst and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUQ</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 1E /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned quadword integer values in ymm3/m256/m64bcst and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUQ</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 1E /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned quadword integer values in zmm3/m512/m64bcst and zmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPCMPW/VPCMPUW--Compare Packed Word Values Into Mask.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPW</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F3A.W1 3F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm3/m128 and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPW</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F3A.W1 3F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in ymm3/m256 and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPW</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F3A.W1 3F /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in zmm3/m512 and zmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUW</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F3A.W1 3E /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in xmm3/m128 and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUW</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F3A.W1 3E /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in ymm3/m256 and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUW</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F3A.W1 3E /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in zmm3/m512 and zmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPCOMPRESSD--Store Sparse Packed Doubleword Integer Values into Dense Memory/Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCOMPRESSD</mnem>
+			<args>xmm1/m128 {k1}{z},xmm2</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 8B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed doubleword integer values from xmm2 to xmm1/m128 using controlmask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCOMPRESSD</mnem>
+			<args>ymm1/m256 {k1}{z},ymm2</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 8B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed doubleword integer values from ymm2 to ymm1/m256 using controlmask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCOMPRESSD</mnem>
+			<args>zmm1/m512 {k1}{z},zmm2</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 8B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed doubleword integer values from zmm2 to zmm1/m512 using controlmask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPCOMPRESSQ--Store Sparse Packed Quadword Integer Values into Dense Memory/Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCOMPRESSQ</mnem>
+			<args>xmm1/m128 {k1}{z},xmm2</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 8B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed quadword integer values from xmm2 to xmm1/m128 using controlmask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCOMPRESSQ</mnem>
+			<args>ymm1/m256 {k1}{z},ymm2</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 8B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed quadword integer values from ymm2 to ymm1/m256 using controlmask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCOMPRESSQ</mnem>
+			<args>zmm1/m512 {k1}{z},zmm2</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 8B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed quadword integer values from zmm2 to zmm1/m512 using controlmask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPCONFLICTD/Q--Detect Conflicts Within a Vector of Packed Dword/Qword Values into Dense Memory/ Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCONFLICTD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W0 C4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Detect duplicate double-word values in xmm2/m128/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCONFLICTD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W0 C4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Detect duplicate double-word values in ymm2/m256/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCONFLICTD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W0 C4 /r</opc>
+			<cpuid>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Detect duplicate double-word values in zmm2/m512/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCONFLICTQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W1 C4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Detect duplicate quad-word values in xmm2/m128/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCONFLICTQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W1 C4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Detect duplicate quad-word values in ymm2/m256/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCONFLICTQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W1 C4 /r</opc>
+			<cpuid>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Detect duplicate quad-word values in zmm2/m512/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMB--Permute Packed Bytes Elements.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.128.NDS.66.0F38.W0 8D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Permute bytes in xmm3/m128 using byte indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.256.NDS.66.0F38.W0 8D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Permute bytes in ymm3/m256 using byte indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.512.NDS.66.0F38.W0 8D /r</opc>
+			<cpuid>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Permute bytes in zmm3/m512 using byte indexes in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMD/VPERMW--Permute Packed Doublewords/Words Elements.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 36 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Permute doublewords in ymm3/m256 using indices in ymm2 and store the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 36 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute doublewords in ymm3/m256/m32bcst using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 36 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute doublewords in zmm3/m512/m32bcst using indices in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.W1 8D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Permute word integers in xmm3/m128 using indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W1 8D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Permute word integers in ymm3/m256 using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W1 8D /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Permute word integers in zmm3/m512 using indexes in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMI2B--Full Permute of Bytes From Two Tables Overwriting the Index.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2B</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.DDS.128.66.0F38.W0 75 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Permute bytes in xmm3/m128 and xmm2 using byte indexes in xmm1 and store the byte results in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2B</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.DDS.256.66.0F38.W0 75 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Permute bytes in ymm3/m256 and ymm2 using byte indexes in ymm1 and store the byte results in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2B</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.DDS.512.66.0F38.W0 75 /r</opc>
+			<cpuid>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Permute bytes in zmm3/m512 and zmm2 using byte indexes in zmm1 and store the byte results in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMI2W/D/Q/PS/PD--Full Permute From Two Tables Overwriting the Index.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2W</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.DDS.128.66.0F38.W1 75 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Permute word integers from two tables in xmm3/m128 and xmm2 using indexes in xmm1 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2W</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.DDS.256.66.0F38.W1 75 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Permute word integers from two tables in ymm3/m256 and ymm2 using indexes in ymm1 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2W</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.DDS.512.66.0F38.W1 75 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Permute word integers from two tables in zmm3/m512 and zmm2 using indexes in zmm1 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2D</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 76 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-words from two tables in xmm3/m128/m32bcst and xmm2 using indexes in xmm1 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2D</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 76 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-words from two tables in ymm3/m256/m32bcst and ymm2 using indexes in ymm1 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2D</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 76 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-words from two tables in zmm3/m512/m32bcst and zmm2 using indices in zmm1 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2Q</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 76 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute quad-words from two tables in xmm3/m128/m64bcst and xmm2 using indexes in xmm1 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2Q</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 76 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute quad-words from two tables in ymm3/m256/m64bcst and ymm2 using indexes in ymm1 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2Q</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 76 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute quad-words from two tables in zmm3/m512/m64bcst and zmm2 using indices in zmm1 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 77 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision FP values from two tables in xmm3/m128/m32bcst and xmm2 using indexes in xmm1 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 77 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision FP values from two tables in ymm3/m256/m32bcst and ymm2 using indexes in ymm1 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 77 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision FP values from two tables in zmm3/m512/m32bcst and zmm2 using indices in zmm1 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 77 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision FP values from two tables in xmm3/m128/m64bcst and xmm2 using indexes in xmm1 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 77 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision FP values from two tables in ymm3/m256/m64bcst and ymm2 using indexes in ymm1 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 77 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision FP values from two tables in zmm3/m512/m64bcst and zmm2 using indices in zmm1 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMT2B--Full Permute of Bytes From Two Tables Overwriting a Table.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2B</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.DDS.128.66.0F38.W0 7D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Permute bytes in xmm3/m128 and xmm1 using byte indexes in xmm2 and store the byte results in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2B</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W0 7D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Permute bytes in ymm3/m256 and ymm1 using byte indexes in ymm2 and store the byte results in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2B</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W0 7D /r</opc>
+			<cpuid>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Permute bytes in zmm3/m512 and zmm1 using byte indexes in zmm2 and store the byte results in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMT2W/D/Q/PS/PD--Full Permute from Two Tables Overwriting one Table.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2W</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.DDS.128.66.0F38.W1 7D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Permute word integers from two tables in xmm3/m128 and xmm1 using indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2W</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.DDS.256.66.0F38.W1 7D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Permute word integers from two tables in ymm3/m256 and ymm1 using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2W</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.DDS.512.66.0F38.W1 7D /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Permute word integers from two tables in zmm3/m512 and zmm1 using indexes in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2D</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 7E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-words from two tables in xmm3/m128/m32bcst and xmm1 using indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2D</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 7E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-words from two tables in ymm3/m256/m32bcst and ymm1 using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2D</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 7E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-words from two tables in zmm3/m512/m32bcst and zmm1 using indices in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2Q</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 7E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute quad-words from two tables in xmm3/m128/m64bcst and xmm1 using indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2Q</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 7E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute quad-words from two tables in ymm3/m256/m64bcst and ymm1 using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2Q</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 7E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute quad-words from two tables in zmm3/m512/m64bcst and zmm1 using indices in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision FP values from two tables in xmm3/m128/m32bcst and xmm1 using indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision FP values from two tables in ymm3/m256/m32bcst and ymm1 using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision FP values from two tables in zmm3/m512/m32bcst and zmm1 using indices in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision FP values from two tables in xmm3/m128/m64bcst and xmm1 using indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision FP values from two tables in ymm3/m256/m64bcst and ymm1 using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision FP values from two tables in zmm3/m512/m64bcst and zmm1 using indices in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMILPD--Permute In-Lane of Pairs of Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 0D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in xmm2 using controls from xmm3/m128 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 0D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in ymm2 using controls from ymm3/m256 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 0D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in xmm2 using control from xmm3/m128/m64bcst and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 0D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in ymm2 using control from ymm3/m256/m64bcst and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 0D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in zmm2 using control from zmm3/m512/m64bcst and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RM">VEX.128.66.0F3A.W0 05 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in xmm2/m128 using controls from imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RM">VEX.256.66.0F3A.W0 05 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in ymm2/m256 using controls from imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W1 05 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in xmm2/m128/m64bcst using controls from imm8 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W1 05 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in ymm2/m256/m64bcst using controls from imm8 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W1 05 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in zmm2/m512/m64bcst using controls from imm8 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMILPS--Permute In-Lane of Quadruples of Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 0C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values in xmm2 using controls from xmm3/m128 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RM">VEX.128.66.0F3A.W0 04 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values in xmm2/m128 using controls from imm8 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 0C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values in ymm2 using controls from ymm3/m256 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RM">VEX.256.66.0F3A.W0 04 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values in ymm2/m256 using controls from imm8 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 0C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values xmm2 using control from xmm3/m128/m32bcst and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 0C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values ymm2 using control from ymm3/m256/m32bcst and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 0C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values zmm2 using control from zmm3/m512/m32bcst and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W0 04 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values xmm2/m128/m32bcst using controls from imm8 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W0 04 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values ymm2/m256/m32bcst using controls from imm8 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W0 04 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values zmm2/m512/m32bcst using controls from imm8 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMPD--Permute Double-Precision Floating-Point Elements.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMPD</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.66.0F3A.W1 01 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point elements in ymm2/m256 using indices in imm8 and store the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W1 01 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point elements in ymm2/m256/m64bcst using indexes in imm8 and store the result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W1 01 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point elements in zmm2/m512/m64bcst using indices in imm8 and store the result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 16 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point elements in ymm3/m256/m64bcst using indexes in ymm2 and store the result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 16 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point elements in zmm3/m512/m64bcst using indices in zmm2 and store the result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMPS--Permute Single-Precision Floating-Point Elements.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.256.66.0F38.W0 16 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point elements in ymm3/m256 using indices in ymm2 and store the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 16 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point elements in ymm3/m256/m32bcst using indexes in ymm2 and store the result in ymm1 subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 16 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values in zmm3/m512/m32bcst using indices in zmm2 and store the result in zmm1 subject to write mask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMQ--Qwords Element Permutation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMQ</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.66.0F3A.W1 00 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Permute qwords in ymm2/m256 using indices in imm8 and store the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W1 00 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute qwords in ymm2/m256/m64bcst using indexes in imm8 and store the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W1 00 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute qwords in zmm2/m512/m64bcst using indices in imm8 and store the result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 36 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute qwords in ymm3/m256/m64bcst using indexes in ymm2 and store the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 36 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute qwords in zmm3/m512/m64bcst using indices in zmm2 and store the result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPEXPANDD--Load Sparse Packed Doubleword Integer Values from Dense Memory / Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXPANDD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 89 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed double-word integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXPANDD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 89 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed double-word integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXPANDD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 89 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed double-word integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPEXPANDQ--Load Sparse Packed Quadword Integer Values from Dense Memory / Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXPANDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 89 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed quad-word integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXPANDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 89 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed quad-word integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXPANDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 89 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed quad-word integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PEXTRB/PEXTRW/PEXTRD/PEXTRQ--Extract Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PEXTRB</mnem>
+			<args>reg/m8,xmm2,imm8</args>
+			<opc openc="MRI">66 0F 3A 14 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Extract a byte integer value from xmm2 at the source byte offset specified by imm8 into reg or m8. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PEXTRW</mnem>
+			<args>reg,xmm1,imm8</args>
+			<opc openc="RMI">66 0F C5 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Extract the word specified by imm8 from xmm1 and move it to reg, bits 15:0. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PEXTRW</mnem>
+			<args>reg/m16,xmm2,imm8</args>
+			<opc openc="MRI">66 0F 3A 15 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Extract a word integer value from xmm2 at the source word offset specified by imm8 into reg or m16. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PEXTRD</mnem>
+			<args>r32/m32,xmm2,imm8</args>
+			<opc openc="MRI">66 0F 3A 16 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Extract a dword integer value from xmm2 at the source dword offset specified by imm8 into r32/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>PEXTRQ</mnem>
+			<args>r64/m64,xmm2,imm8</args>
+			<opc openc="MRI">66 REX.W 0F 3A 16 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Extract a qword integer value from xmm2 at the source dword offset specified by imm8 into r64/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXTRB</mnem>
+			<args>reg/m8,xmm2,imm8</args>
+			<opc openc="MRI">VEX.128.66.0F3A 14 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract a byte integer value from xmm2 at the source byte offset specified by imm8 into reg or m8. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXTRW</mnem>
+			<args>reg,xmm1,imm8</args>
+			<opc openc="RMI">VEX.128.66.0F C5 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract the word specified by imm8 from xmm1 and move it to reg, bits 15:0. Zero-extend the result. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXTRW</mnem>
+			<args>reg/m16,xmm2,imm8</args>
+			<opc openc="MRI">VEX.128.66.0F3A 15 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract a word integer value from xmm2 at the source word offset specified by imm8 into reg or m16. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXTRD</mnem>
+			<args>r32/m32,xmm2,imm8</args>
+			<opc openc="MRI">VEX.128.66.0F3A.W0 16 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract a dword integer value from xmm2 at the source dword offset specified by imm8 into r32/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>VPEXTRQ</mnem>
+			<args>r64/m64,xmm2,imm8</args>
+			<opc openc="MRI">VEX.128.66.0F3A.W1 16 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract a qword integer value from xmm2 at the source dword offset specified by imm8 into r64/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXTRB</mnem>
+			<args>reg/m8,xmm2,imm8</args>
+			<opc openc="T1S">EVEX.128.66.0F3A.WIG 14 /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Extract a byte integer value from xmm2 at the source byte offset specified by imm8 into reg or m8. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXTRW</mnem>
+			<args>reg,xmm1,imm8</args>
+			<opc openc="RMI">EVEX.128.66.0F.WIG C5 /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Extract the word specified by imm8 from xmm1 and move it to reg, bits 15:0. Zero-extend the result. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXTRW</mnem>
+			<args>reg/m16,xmm2,imm8</args>
+			<opc openc="T1S">EVEX.128.66.0F3A.WIG 15 /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Extract a word integer value from xmm2 at the source word offset specified by imm8 into reg or m16. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXTRD</mnem>
+			<args>r32/m32,xmm2,imm8</args>
+			<opc openc="T1S">EVEX.128.66.0F3A.W0 16 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Extract a dword integer value from xmm2 at the source dword offset specified by imm8 into r32/m32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VPEXTRQ</mnem>
+			<args>r64/m64,xmm2,imm8</args>
+			<opc openc="T1S">EVEX.128.66.0F3A.W1 16 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Extract a qword integer value from xmm2 at the source dword offset specified by imm8 into r64/m64.</dscrp>
+		</ins>
+		<oprndenc openc="MRI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-MRI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPLZCNTD/Q--Count the Number of Leading Zero Bits for Packed Dword, Packed Qword Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPLZCNTD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W0 44 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Count the number of leading zero bits in each dword element of xmm2/m128/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPLZCNTD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W0 44 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Count the number of leading zero bits in each dword element of ymm2/m256/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPLZCNTD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W0 44 /r</opc>
+			<cpuid>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Count the number of leading zero bits in each dword element of zmm2/m512/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPLZCNTQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W1 44 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Count the number of leading zero bits in each qword element of xmm2/m128/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPLZCNTQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W1 44 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Count the number of leading zero bits in each qword element of ymm2/m256/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPLZCNTQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W1 44 /r</opc>
+			<cpuid>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Count the number of leading zero bits in each qword element of zmm2/m512/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMADDUBSW--Multiply and Add Packed Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMADDUBSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 04 /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDUBSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38 04 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDUBSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38 04 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDUBSW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.WIG 04 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDUBSW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.WIG 04 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDUBSW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.WIG 04 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMADDWD--Multiply and Add Packed Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMADDWD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F5 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed word integers in xmm1 by the packed word integers in xmm2/m128, add adjacent doubleword results, and store in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDWD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F F5 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the packed word integers in xmm2 by the packed word integers in xmm3/m128, add adjacent doubleword results, and store in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDWD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F F5 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed word integers in ymm2 by the packed word integers in ymm3/m256, add adjacent doubleword results, and store in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDWD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG F5 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed word integers in xmm2 by the packed word integers in xmm3/m128, add adjacent doubleword results, and store in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDWD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG F5 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed word integers in ymm2 by the packed word integers in ymm3/m256, add adjacent doubleword results, and store in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDWD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG F5 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed word integers in zmm2 by the packed word integers in zmm3/m512, add adjacent doubleword results, and store in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PINSRB/PINSRW/PINSRD/PINSRQ--Insert Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PINSRB</mnem>
+			<args>xmm1,r32/m8,imm8</args>
+			<opc openc="RMI">66 0F 3A 20 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Insert a byte integer value from r32/m8 into xmm1 at the byte offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PINSRW</mnem>
+			<args>xmm1,r32/m16,imm8</args>
+			<opc openc="RMI">66 0F C4 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Insert a word integer value from r32/m16 into xmm1 at the word offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PINSRD</mnem>
+			<args>xmm1,r32/m32,imm8</args>
+			<opc openc="RMI">66 0F 3A 22 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Insert a dword integer value from r32/m32 into xmm1 at the dword offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>PINSRQ</mnem>
+			<args>xmm1,r64/m64,imm8</args>
+			<opc openc="RMI">66 REX.W 0F 3A 22 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Insert a qword integer value from r64/m64 into xmm1 at the qword offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPINSRB</mnem>
+			<args>xmm1,xmm2,r32/m8,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A 20 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge a byte integer value from r32/m8 and rest from xmm2 into xmm1 at the byte offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPINSRW</mnem>
+			<args>xmm1,xmm2,r32/m16,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F C4 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Insert a word integer value from r32/m16 and rest from xmm2 into xmm1 at the word offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPINSRD</mnem>
+			<args>xmm1,xmm2,r32/m32,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.W0 22 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Insert a dword integer value from r32/m32 and rest from xmm2 into xmm1 at the dword offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VPINSRQ</mnem>
+			<args>xmm1,xmm2,r64/m64,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.W1 22 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Insert a qword integer value from r64/m64 and rest from xmm2 into xmm1 at the qword offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPINSRB</mnem>
+			<args>xmm1,xmm2,r32/m8,imm8</args>
+			<opc openc="T1S">EVEX.NDS.128.66.0F3A.WIG 20 /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Merge a byte integer value from r32/m8 and rest from xmm2 into xmm1 at the byte offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPINSRW</mnem>
+			<args>xmm1,xmm2,r32/m16,imm8</args>
+			<opc openc="T1S">EVEX.NDS.128.66.0F.WIG C4 /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Insert a word integer value from r32/m16 and rest from xmm2 into xmm1 at the word offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPINSRD</mnem>
+			<args>xmm1,xmm2,r32/m32,imm8</args>
+			<opc openc="T1S">EVEX.NDS.128.66.0F3A.W0 22 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Insert a dword integer value from r32/m32 and rest from xmm2 into xmm1 at the dword offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VPINSRQ</mnem>
+			<args>xmm1,xmm2,r64/m64,imm8</args>
+			<opc openc="T1S">EVEX.NDS.128.66.0F3A.W1 22 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Insert a qword integer value from r64/m64 and rest from xmm2 into xmm1 at the qword offset in imm8.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMADD52LUQ--Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit Products to Qword Accumulators.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADD52LUQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 B4 /r</opc>
+			<cpuid>
+				<flag>AVX512IFMA</flag>
+				<flag>AVX512VL</flag>
+			</cpuid>
+			<dscrp>Multiply unsigned 52-bit integers in xmm2 and xmm3/m128 and add the low 52 bits of the 104-bit product to the qword unsigned integers in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADD52LUQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 B4 /r</opc>
+			<cpuid>
+				<flag>AVX512IFMA</flag>
+				<flag>AVX512VL</flag>
+			</cpuid>
+			<dscrp>Multiply unsigned 52-bit integers in ymm2 and ymm3/m128 and add the low 52 bits of the 104-bit product to the qword unsigned integers in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADD52LUQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 B4 /r</opc>
+			<cpuid>
+				<flag>AVX512IFMA</flag>
+			</cpuid>
+			<dscrp>Multiply unsigned 52-bit integers in zmm2 and zmm3/m128 and add the low 52 bits of the 104-bit product to the qword unsigned integers in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM.r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMADD52HUQ--Packed Multiply of Unsigned 52-bit Unsigned Integers and Add High 52-bit Products to 64-bit Accumulators'.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADD52HUQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 B5 /r</opc>
+			<cpuid>
+				<flag>AVX512IFMA</flag>
+				<flag>AVX512VL</flag>
+			</cpuid>
+			<dscrp>Multiply unsigned 52-bit integers in xmm2 and xmm3/m128 and add the high 52 bits of the 104bit product to the qword unsigned integers in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADD52HUQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 B5 /r</opc>
+			<cpuid>
+				<flag>AVX512IFMA</flag>
+				<flag>AVX512VL</flag>
+			</cpuid>
+			<dscrp>Multiply unsigned 52-bit integers in ymm2 and ymm3/m128 and add the high 52 bits of the 104bit product to the qword unsigned integers in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADD52HUQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 B5 /r</opc>
+			<cpuid>
+				<flag>AVX512IFMA</flag>
+			</cpuid>
+			<dscrp>Multiply unsigned 52-bit integers in zmm2 and zmm3/m128 and add the high 52 bits of the 104bit product to the qword unsigned integers in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM.r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMAXSB/PMAXSW/PMAXSD/PMAXSQ--Maximum of Packed Signed Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 3C /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F EE /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm2/m128 and xmm1 and stores maximum packed values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXSD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 3D /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in xmm1 and xmm2/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 3C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG EE /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm3/m128 and xmm2 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 3D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 3C /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG EE /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in ymm3/m256 and ymm2 and store packed maximum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 3D /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSB</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.WIG 3C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSB</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.WIG 3C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSB</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.WIG 3C /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in zmm2 and zmm3/m512 and store packed maximum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSW</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG EE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSW</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG EE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSW</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG EE /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in zmm2 and zmm3/m512 and store packed maximum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 3D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in xmm2 and xmm3/m128/m32bcst and store packed maximum values in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 3D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in ymm2 and ymm3/m256/m32bcst and store packed maximum values in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 3D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in zmm2 and zmm3/m512/m32bcst and store packed maximum values in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 3D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qword integers in xmm2 and xmm3/m128/m64bcst and store packed maximum values in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 3D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qword integers in ymm2 and ymm3/m256/m64bcst and store packed maximum values in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 3D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qword integers in zmm2 and zmm3/m512/m64bcst and store packed maximum values in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMAXUB/PMAXUW--Maximum of Packed Unsigned Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXUB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F DE /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXUW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 3E/r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in xmm2/m128 and xmm1 and stores maximum packed values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F DE /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38 3E/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in xmm3/m128 and xmm2 and store maximum packed values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F DE /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38 3E/r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in ymm3/m256 and ymm2 and store maximum packed values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUB</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG DE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUB</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG DE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUB</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG DE /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in zmm2 and zmm3/m512 and store packed maximum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUW</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.WIG 3E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUW</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.WIG 3E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUW</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.WIG 3E /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in zmm2 and zmm3/m512 and store packed maximum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMAXUD/PMAXUQ--Maximum of Packed Unsigned Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXUD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 3F /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in xmm1 and xmm2/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 3F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 3F /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 3F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in xmm2 and xmm3/m128/m32bcst and store packed maximum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 3F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in ymm2 and ymm3/m256/m32bcst and store packed maximum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 3F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in zmm2 and zmm3/m512/m32bcst and store packed maximum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 3F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned qword integers in xmm2 and xmm3/m128/m64bcst and store packed maximum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 3F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned qword integers in ymm2 and ymm3/m256/m64bcst and store packed maximum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 3F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned qword integers in zmm2 and zmm3/m512/m64bcst and store packed maximum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMINSB/PMINSW--Minimum of Packed Signed Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 38 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F EA /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm2/m128 and xmm1 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38 38 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F EA /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm3/m128 and xmm2 and return packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38 38 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F EA /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in ymm3/m256 and ymm2 and return packed minimum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSB</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.WIG 38 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSB</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.WIG 38 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSB</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.WIG 38 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in zmm2 and zmm3/m512 and store packed minimum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSW</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG EA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSW</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG EA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSW</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG EA /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in zmm2 and zmm3/m512 and store packed minimum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMINSD/PMINSQ--Minimum of Packed Signed Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINSD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 39 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in xmm1 and xmm2/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 39 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 39 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in ymm2 and ymm3/m128 and store packed minimum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 39 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 39 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 39 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in zmm2 and zmm3/m512/m32bcst and store packed minimum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 39 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qword integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 39 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qword integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 39 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qword integers in zmm2 and zmm3/m512/m64bcst and store packed minimum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMINUB/PMINUW--Minimum of Packed Unsigned Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINUB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F DA /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINUW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 3A/r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in xmm2/m128 and xmm1 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F DA /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38 3A/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in xmm3/m128 and xmm2 and return packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F DA /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38 3A/r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in ymm3/m256 and ymm2 and return packed minimum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F DA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F DA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F DA /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in zmm2 and zmm3/m512 and store packed minimum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUW</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38 3A/r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in xmm3/m128 and xmm2 and return packed minimum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUW</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38 3A/r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in ymm3/m256 and ymm2 and return packed minimum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUW</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38 3A/r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in zmm3/m512 and zmm2 and return packed minimum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMINUD/PMINUQ--Minimum of Packed Unsigned Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINUD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 3B /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in xmm1 and xmm2/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 3B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 3B /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 3B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in xmm2 and xmm3/m128/m32bcst and store packed minimum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 3B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in ymm2 and ymm3/m256/m32bcst and store packed minimum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 3B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in zmm2 and zmm3/m512/m32bcst and store packed minimum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 3B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned qword integers in xmm2 and xmm3/m128/m64bcst and store packed minimum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 3B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned qword integers in ymm2 and ymm3/m256/m64bcst and store packed minimum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 3B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned qword integers in zmm2 and zmm3/m512/m64bcst and store packed minimum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMOVM2B/VPMOVM2W/VPMOVM2D/VPMOVM2Q--Convert a Mask Register to a Vector Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2B</mnem>
+			<args>xmm1,k1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W0 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each byte in XMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2B</mnem>
+			<args>ymm1,k1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W0 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each byte in YMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2B</mnem>
+			<args>zmm1,k1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W0 28 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each byte in ZMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2W</mnem>
+			<args>xmm1,k1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W1 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each word in XMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2W</mnem>
+			<args>ymm1,k1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W1 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each word in YMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2W</mnem>
+			<args>zmm1,k1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W1 28 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each word in ZMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2D</mnem>
+			<args>xmm1,k1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W0 38 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each doubleword in XMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2D</mnem>
+			<args>ymm1,k1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W0 38 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each doubleword in YMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2D</mnem>
+			<args>zmm1,k1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W0 38 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each doubleword in ZMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2Q</mnem>
+			<args>xmm1,k1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W1 38 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each quadword in XMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2Q</mnem>
+			<args>ymm1,k1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W1 38 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each quadword in YMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2Q</mnem>
+			<args>zmm1,k1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W1 38 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each quadword in ZMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMOVB2M/VPMOVW2M/VPMOVD2M/VPMOVQ2M--Convert a Vector Register to a Mask.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVB2M</mnem>
+			<args>k1,xmm1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W0 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding byte in XMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVB2M</mnem>
+			<args>k1,ymm1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W0 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding byte in YMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVB2M</mnem>
+			<args>k1,zmm1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W0 29 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding byte in ZMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVW2M</mnem>
+			<args>k1,xmm1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W1 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding word in XMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVW2M</mnem>
+			<args>k1,ymm1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W1 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding word in YMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVW2M</mnem>
+			<args>k1,zmm1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W1 29 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding word in ZMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVD2M</mnem>
+			<args>k1,xmm1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W0 39 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding doubleword in XMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVD2M</mnem>
+			<args>k1,ymm1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W0 39 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding doubleword in YMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVD2M</mnem>
+			<args>k1,zmm1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W0 39 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding doubleword in ZMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQ2M</mnem>
+			<args>k1,xmm1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W1 39 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding quadword in XMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQ2M</mnem>
+			<args>k1,ymm1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W1 39 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding quadword in YMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQ2M</mnem>
+			<args>k1,zmm1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W1 39 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding quadword in ZMM1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMOVQB/VPMOVSQB/VPMOVUSQB--Down Convert QWord to Byte.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQB</mnem>
+			<args>xmm1/m16 {k1}{z},xmm2</args>
+			<opc openc="OVM">EVEX.128.F3.0F38.W0 32 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 2 packed quad-word integers from xmm2 into 2 packed byte integers in xmm1/m16 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSQB</mnem>
+			<args>xmm1/m16 {k1}{z},xmm2</args>
+			<opc openc="OVM">EVEX.128.F3.0F38.W0 22 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 2 packed signed quad-word integers from xmm2 into 2 packed signed byte integers in xmm1/m16 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSQB</mnem>
+			<args>xmm1/m16 {k1}{z},xmm2</args>
+			<opc openc="OVM">EVEX.128.F3.0F38.W0 12 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 2 packed unsigned quad-word integers from xmm2 into 2 packed unsigned byte integers in xmm1/m16 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQB</mnem>
+			<args>xmm1/m32 {k1}{z},ymm2</args>
+			<opc openc="OVM">EVEX.256.F3.0F38.W0 32 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed quad-word integers from ymm2 into 4 packed byte integers in xmm1/m32 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSQB</mnem>
+			<args>xmm1/m32 {k1}{z},ymm2</args>
+			<opc openc="OVM">EVEX.256.F3.0F38.W0 22 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed signed quad-word integers from ymm2 into 4 packed signed byte integers in xmm1/m32 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSQB</mnem>
+			<args>xmm1/m32 {k1}{z},ymm2</args>
+			<opc openc="OVM">EVEX.256.F3.0F38.W0 12 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed unsigned quad-word integers from ymm2 into 4 packed unsigned byte integers in xmm1/m32 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQB</mnem>
+			<args>xmm1/m64 {k1}{z},zmm2</args>
+			<opc openc="OVM">EVEX.512.F3.0F38.W0 32 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed quad-word integers from zmm2 into 8 packed byte integers in xmm1/m64 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSQB</mnem>
+			<args>xmm1/m64 {k1}{z},zmm2</args>
+			<opc openc="OVM">EVEX.512.F3.0F38.W0 22 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed quad-word integers from zmm2 into 8 packed signed byte integers in xmm1/m64 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSQB</mnem>
+			<args>xmm1/m64 {k1}{z},zmm2</args>
+			<opc openc="OVM">EVEX.512.F3.0F38.W0 12 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed unsigned quad-word integers from zmm2 into 8 packed unsigned byte integers in xmm1/m64 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="OVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMOVQW/VPMOVSQW/VPMOVUSQW--Down Convert QWord to Word.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQW</mnem>
+			<args>xmm1/m32 {k1}{z},xmm2</args>
+			<opc openc="QVM">EVEX.128.F3.0F38.W0 34 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 2 packed quad-word integers from xmm2 into 2 packed word integers in xmm1/m32 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSQW</mnem>
+			<args>xmm1/m32 {k1}{z},xmm2</args>
+			<opc openc="QVM">EVEX.128.F3.0F38.W0 24 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed quad-word integers from zmm2 into 8 packed signed word integers in xmm1/m32 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSQW</mnem>
+			<args>xmm1/m32 {k1}{z},xmm2</args>
+			<opc openc="QVM">EVEX.128.F3.0F38.W0 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 2 packed unsigned quad-word integers from xmm2 into 2 packed unsigned word integers in xmm1/m32 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQW</mnem>
+			<args>xmm1/m64 {k1}{z},ymm2</args>
+			<opc openc="QVM">EVEX.256.F3.0F38.W0 34 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed quad-word integers from ymm2 into 4 packed word integers in xmm1/m64 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSQW</mnem>
+			<args>xmm1/m64 {k1}{z},ymm2</args>
+			<opc openc="QVM">EVEX.256.F3.0F38.W0 24 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed signed quad-word integers from ymm2 into 4 packed signed word integers in xmm1/m64 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSQW</mnem>
+			<args>xmm1/m64 {k1}{z},ymm2</args>
+			<opc openc="QVM">EVEX.256.F3.0F38.W0 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed unsigned quad-word integers from ymm2 into 4 packed unsigned word integers in xmm1/m64 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQW</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2</args>
+			<opc openc="QVM">EVEX.512.F3.0F38.W0 34 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed quad-word integers from zmm2 into 8 packed word integers in xmm1/m128 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSQW</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2</args>
+			<opc openc="QVM">EVEX.512.F3.0F38.W0 24 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed quad-word integers from zmm2 into 8 packed signed word integers in xmm1/m128 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSQW</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2</args>
+			<opc openc="QVM">EVEX.512.F3.0F38.W0 14 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed unsigned quad-word integers from zmm2 into 8 packed unsigned word integers in xmm1/m128 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="QVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMOVQD/VPMOVSQD/VPMOVUSQD--Down Convert QWord to DWord.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQD</mnem>
+			<args>xmm1/m128 {k1}{z},xmm2</args>
+			<opc openc="A">EVEX.128.F3.0F38.W0 35 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 2 packed quad-word integers from xmm2 into 2 packed double-word integers in xmm1/m128 with truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSQD</mnem>
+			<args>xmm1/m64 {k1}{z},xmm2</args>
+			<opc openc="A">EVEX.128.F3.0F38.W0 25 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 2 packed signed quad-word integers from xmm2 into 2 packed signed double-word integers in xmm1/m64 using signed saturation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSQD</mnem>
+			<args>xmm1/m64 {k1}{z},xmm2</args>
+			<opc openc="A">EVEX.128.F3.0F38.W0 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 2 packed unsigned quad-word integers from xmm2 into 2 packed unsigned double-word integers in xmm1/m64 using unsigned saturation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQD</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2</args>
+			<opc openc="A">EVEX.256.F3.0F38.W0 35 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed quad-word integers from ymm2 into 4 packed double-word integers in xmm1/m128 with truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSQD</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2</args>
+			<opc openc="A">EVEX.256.F3.0F38.W0 25 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed signed quad-word integers from ymm2 into 4 packed signed double-word integers in xmm1/m128 using signed saturation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSQD</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2</args>
+			<opc openc="A">EVEX.256.F3.0F38.W0 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed unsigned quad-word integers from ymm2 into 4 packed unsigned double-word integers in xmm1/m128 using unsigned saturation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQD</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2</args>
+			<opc openc="HVM">EVEX.512.F3.0F38.W0 35 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed quad-word integers from zmm2 into 8 packed double-word integers in ymm1/m256 with truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSQD</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2</args>
+			<opc openc="HVM">EVEX.512.F3.0F38.W0 25 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed quad-word integers from zmm2 into 8 packed signed double-word integers in ymm1/m256 using signed saturation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSQD</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2</args>
+			<opc openc="HVM">EVEX.512.F3.0F38.W0 15 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed unsigned quad-word integers from zmm2 into 8 packed unsigned double-word integers in ymm1/m256 using unsigned saturation subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="HVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMOVDB/VPMOVSDB/VPMOVUSDB--Down Convert DWord to Byte.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVDB</mnem>
+			<args>xmm1/m32 {k1}{z},xmm2</args>
+			<opc openc="QVM">EVEX.128.F3.0F38.W0 31 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed double-word integers from xmm2 into 4 packed byte integers in xmm1/m32 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSDB</mnem>
+			<args>xmm1/m32 {k1}{z},xmm2</args>
+			<opc openc="QVM">EVEX.128.F3.0F38.W0 21 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed signed double-word integers from xmm2 into 4 packed signed byte integers in xmm1/m32 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSDB</mnem>
+			<args>xmm1/m32 {k1}{z},xmm2</args>
+			<opc openc="QVM">EVEX.128.F3.0F38.W0 11 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed unsigned double-word integers from xmm2 into 4 packed unsigned byte integers in xmm1/m32 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVDB</mnem>
+			<args>xmm1/m64 {k1}{z},ymm2</args>
+			<opc openc="QVM">EVEX.256.F3.0F38.W0 31 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed double-word integers from ymm2 into 8 packed byte integers in xmm1/m64 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSDB</mnem>
+			<args>xmm1/m64 {k1}{z},ymm2</args>
+			<opc openc="QVM">EVEX.256.F3.0F38.W0 21 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed double-word integers from ymm2 into 8 packed signed byte integers in xmm1/m64 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSDB</mnem>
+			<args>xmm1/m64 {k1}{z},ymm2</args>
+			<opc openc="QVM">EVEX.256.F3.0F38.W0 11 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed unsigned double-word integers from ymm2 into 8 packed unsigned byte integers in xmm1/m64 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVDB</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2</args>
+			<opc openc="QVM">EVEX.512.F3.0F38.W0 31 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed double-word integers from zmm2 into 16 packed byte integers in xmm1/m128 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSDB</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2</args>
+			<opc openc="QVM">EVEX.512.F3.0F38.W0 21 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed signed double-word integers from zmm2 into 16 packed signed byte integers in xmm1/m128 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSDB</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2</args>
+			<opc openc="QVM">EVEX.512.F3.0F38.W0 11 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed unsigned double-word integers from zmm2 into 16 packed unsigned byte integers in xmm1/m128 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="QVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMOVDW/VPMOVSDW/VPMOVUSDW--Down Convert DWord to Word.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVDW</mnem>
+			<args>xmm1/m64 {k1}{z},xmm2</args>
+			<opc openc="HVM">EVEX.128.F3.0F38.W0 33 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed double-word integers from xmm2 into 4 packed word integers in xmm1/m64 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSDW</mnem>
+			<args>xmm1/m64 {k1}{z},xmm2</args>
+			<opc openc="HVM">EVEX.128.F3.0F38.W0 23 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed signed double-word integers from xmm2 into 4 packed signed word integers in ymm1/m64 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSDW</mnem>
+			<args>xmm1/m64 {k1}{z},xmm2</args>
+			<opc openc="HVM">EVEX.128.F3.0F38.W0 13 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed unsigned double-word integers from xmm2 into 4 packed unsigned word integers in xmm1/m64 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVDW</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2</args>
+			<opc openc="HVM">EVEX.256.F3.0F38.W0 33 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed double-word integers from ymm2 into 8 packed word integers in xmm1/m128 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSDW</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2</args>
+			<opc openc="HVM">EVEX.256.F3.0F38.W0 23 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed double-word integers from ymm2 into 8 packed signed word integers in xmm1/m128 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSDW</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2</args>
+			<opc openc="HVM">EVEX.256.F3.0F38.W0 13 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed unsigned double-word integers from ymm2 into 8 packed unsigned word integers in xmm1/m128 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVDW</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2</args>
+			<opc openc="HVM">EVEX.512.F3.0F38.W0 33 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed double-word integers from zmm2 into 16 packed word integers in ymm1/m256 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSDW</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2</args>
+			<opc openc="HVM">EVEX.512.F3.0F38.W0 23 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed signed double-word integers from zmm2 into 16 packed signed word integers in ymm1/m256 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSDW</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2</args>
+			<opc openc="HVM">EVEX.512.F3.0F38.W0 13 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed unsigned double-word integers from zmm2 into 16 packed unsigned word integers in ymm1/m256 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="HVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMOVWB/VPMOVSWB/VPMOVUSWB--Down Convert Word to Byte.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVWB</mnem>
+			<args>xmm1/m64 {k1}{z},xmm2</args>
+			<opc openc="HVM">EVEX.128.F3.0F38.W0 30 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed word integers from xmm2 into 8 packed bytes in xmm1/m64 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSWB</mnem>
+			<args>xmm1/m64 {k1}{z},xmm2</args>
+			<opc openc="HVM">EVEX.128.F3.0F38.W0 20 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed word integers from xmm2 into 8 packed signed bytes in xmm1/m64 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSWB</mnem>
+			<args>xmm1/m64 {k1}{z},xmm2</args>
+			<opc openc="HVM">EVEX.128.F3.0F38.W0 10 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed unsigned word integers from xmm2 into 8 packed unsigned bytes in 8mm1/m64 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVWB</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2</args>
+			<opc openc="HVM">EVEX.256.F3.0F38.W0 30 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed word integers from ymm2 into 16 packed bytes in xmm1/m128 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSWB</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2</args>
+			<opc openc="HVM">EVEX.256.F3.0F38.W0 20 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed signed word integers from ymm2 into 16 packed signed bytes in xmm1/m128 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSWB</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2</args>
+			<opc openc="HVM">EVEX.256.F3.0F38.W0 10 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed unsigned word integers from ymm2 into 16 packed unsigned bytes in xmm1/m128 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVWB</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2</args>
+			<opc openc="HVM">EVEX.512.F3.0F38.W0 30 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts 32 packed word integers from zmm2 into 32 packed bytes in ymm1/m256 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSWB</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2</args>
+			<opc openc="HVM">EVEX.512.F3.0F38.W0 20 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts 32 packed signed word integers from zmm2 into 32 packed signed bytes in ymm1/m256 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSWB</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2</args>
+			<opc openc="HVM">EVEX.512.F3.0F38.W0 10 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts 32 packed unsigned word integers from zmm2 into 32 packed unsigned bytes in ymm1/m256 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="HVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMOVSX--Packed Move with Sign Extend.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVSXBW</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0f 38 20 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVSXBD</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">66 0f 38 21 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVSXBQ</mnem>
+			<args>xmm1,xmm2/m16</args>
+			<opc openc="RM">66 0f 38 22 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVSXWD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0f 38 23/r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVSXWQ</mnem>
+			<args>xmm1,xmm2/m3</args>
+			<opc openc="RM">66 0f 38 24 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVSXDQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0f 38 25 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBW</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 20 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBD</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 21 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBQ</mnem>
+			<args>xmm1,xmm2/m16</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 22 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 23 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWQ</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 24 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXDQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 25 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBW</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 20 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sign extend 16 packed 8-bit integers in xmm2/m128 to 16 packed 16-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBD</mnem>
+			<args>ymm1,xmm2/m64</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 21 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 32-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBQ</mnem>
+			<args>ymm1,xmm2/m32</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 22 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 64-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWD</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 23 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 16-bit integers in the low 16 bytes of xmm2/m128 to 8 packed 32-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWQ</mnem>
+			<args>ymm1,xmm2/m64</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 24 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 64-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXDQ</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 25 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 32-bit integers in the low 16 bytes of xmm2/m128 to 4 packed 64-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBW</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="HVM">EVEX.128.66.0F38.WIG 20 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 8-bit integers in xmm2/m64 to 8 packed 16-bit integers in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBW</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128</args>
+			<opc openc="HVM">EVEX.256.66.0F38.WIG 20 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sign extend 16 packed 8-bit integers in xmm2/m128 to 16 packed 16-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBW</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256</args>
+			<opc openc="HVM">EVEX.512.66.0F38.WIG 20 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sign extend 32 packed 8-bit integers in ymm2/m256 to 32 packed 16-bit integers in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m32</args>
+			<opc openc="QVM">EVEX.128.66.0F38.WIG 21 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBD</mnem>
+			<args>ymm1 {k1}{z},xmm2/m64</args>
+			<opc openc="QVM">EVEX.256.66.0F38.WIG 21 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 32-bit integers in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBD</mnem>
+			<args>zmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="QVM">EVEX.512.66.0F38.WIG 21 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 16 packed 8-bit integers in the low 16 bytes of xmm2/m128 to 16 packed 32-bit integers in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m16</args>
+			<opc openc="OVM">EVEX.128.66.0F38.WIG 22 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m32</args>
+			<opc openc="OVM">EVEX.256.66.0F38.WIG 22 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 64-bit integers in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBQ</mnem>
+			<args>zmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="OVM">EVEX.512.66.0F38.WIG 22 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 64-bit integers in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="HVM">EVEX.128.66.0F38.WIG 23 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 16-bit integers in the low 8 bytes of ymm2/mem to 4 packed 32-bit integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWD</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128</args>
+			<opc openc="HVM">EVEX.256.66.0F38.WIG 23 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 16-bit integers in the low 16 bytes of ymm2/m128 to 8 packed 32-bit integers in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWD</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256</args>
+			<opc openc="HVM">EVEX.512.66.0F38.WIG 23 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 16 packed 16-bit integers in the low 32 bytes of ymm2/m256 to 16 packed 32-bit integers in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m32</args>
+			<opc openc="QVM">EVEX.128.66.0F38.WIG 24 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m64</args>
+			<opc openc="QVM">EVEX.256.66.0F38.WIG 24 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 64-bit integers in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWQ</mnem>
+			<args>zmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="QVM">EVEX.512.66.0F38.WIG 24 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 16-bit integers in the low 16 bytes of xmm2/m128 to 8 packed 64-bit integers in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="HVM">EVEX.128.66.0F38.W0 25 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXDQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128</args>
+			<opc openc="HVM">EVEX.256.66.0F38.W0 25 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 32-bit integers in the low 16 bytes of xmm2/m128 to 4 packed 64-bit integers in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXDQ</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256</args>
+			<opc openc="HVM">EVEX.512.66.0F38.W0 25 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 32-bit integers in the low 32 bytes of ymm2/m256 to 8 packed 64-bit integers in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="HVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="QVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="OVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMOVZX--Packed Move with Zero Extend.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVZXBW</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0f 38 30 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVZXBD</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">66 0f 38 31 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVZXBQ</mnem>
+			<args>xmm1,xmm2/m16</args>
+			<opc openc="RM">66 0f 38 32 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVZXWD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0f 38 33 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVZXWQ</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">66 0f 38 34 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVZXDQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0f 38 35 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBW</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 30 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBD</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 31 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBQ</mnem>
+			<args>xmm1,xmm2/m16</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 32 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 33 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWQ</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 34 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXDQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F 38.WIG 35 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBW</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 30 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Zero extend 16 packed 8-bit integers in xmm2/m128 to 16 packed 16-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBD</mnem>
+			<args>ymm1,xmm2/m64</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 31 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 32-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBQ</mnem>
+			<args>ymm1,xmm2/m32</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 32 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 64-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWD</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 33 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 16-bit integers xmm2/m128 to 8 packed 32-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWQ</mnem>
+			<args>ymm1,xmm2/m64</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 34 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXDQ</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 35 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 32-bit integers in xmm2/m128 to 4 packed 64-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBW</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="HVM">EVEX.128.66.0F38 30.WIG /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBW</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128</args>
+			<opc openc="HVM">EVEX.256.66.0F38.WIG 30 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Zero extend 16 packed 8-bit integers in xmm2/m128 to 16 packed 16-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBW</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256</args>
+			<opc openc="HVM">EVEX.512.66.0F38.WIG 30 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Zero extend 32 packed 8-bit integers in ymm2/m256 to 32 packed 16-bit integers in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m32</args>
+			<opc openc="QVM">EVEX.128.66.0F38.WIG 31 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBD</mnem>
+			<args>ymm1 {k1}{z},xmm2/m64</args>
+			<opc openc="QVM">EVEX.256.66.0F38.WIG 31 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 32-bit integers in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBD</mnem>
+			<args>zmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="QVM">EVEX.512.66.0F38.WIG 31 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 16 packed 8-bit integers in xmm2/m128 to 16 packed 32-bit integers in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m16</args>
+			<opc openc="OVM">EVEX.128.66.0F38.WIG 32 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m32</args>
+			<opc openc="OVM">EVEX.256.66.0F38.WIG 32 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 64-bit integers in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBQ</mnem>
+			<args>zmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="OVM">EVEX.512.66.0F38.WIG 32 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 64-bit integers in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="HVM">EVEX.128.66.0F38.WIG 33 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWD</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128</args>
+			<opc openc="HVM">EVEX.256.66.0F38.WIG 33 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 16-bit integers in xmm2/m128 to 8 packed 32-bit integers in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWD</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256</args>
+			<opc openc="HVM">EVEX.512.66.0F38.WIG 33 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 16 packed 16-bit integers in ymm2/m256 to 16 packed 32-bit integers in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m32</args>
+			<opc openc="QVM">EVEX.128.66.0F38.WIG 34 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m64</args>
+			<opc openc="QVM">EVEX.256.66.0F38.WIG 34 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 64-bit integers in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWQ</mnem>
+			<args>zmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="QVM">EVEX.512.66.0F38.WIG 34 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 16-bit integers in xmm2/m128 to 8 packed 64-bit integers in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="HVM">EVEX.128.66.0F38.W0 35 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXDQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128</args>
+			<opc openc="HVM">EVEX.256.66.0F38.W0 35 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 32-bit integers in xmm2/m128 to 4 packed 64-bit integers in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXDQ</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256</args>
+			<opc openc="HVM">EVEX.512.66.0F38.W0 35 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 32-bit integers in ymm2/m256 to 8 packed 64-bit integers in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="HVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="QVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="OVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULDQ--Multiply Packed Doubleword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 28 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Multiply packed signed doubleword integers in xmm1 by packed signed doubleword integers in xmm2/m128, and store the quadword results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 28 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed signed doubleword integers in xmm2 by packed signed doubleword integers in xmm3/m128, and store the quadword results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 28 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply packed signed doubleword integers in ymm2 by packed signed doubleword integers in ymm3/m256, and store the quadword results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed signed doubleword integers in xmm2 by packed signed doubleword integers in xmm3/m128/m64bcst, and store the quadword results in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed signed doubleword integers in ymm2 by packed signed doubleword integers in ymm3/m256/m64bcst, and store the quadword results in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 28 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed signed doubleword integers in zmm2 by packed signed doubleword integers in zmm3/m512/m64bcst, and store the quadword results in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULHRSW--Multiply Packed Unsigned Integers with Round and Scale.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULHRSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 0B /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHRSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38 0B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHRSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38 0B /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHRSW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.WIG 0B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHRSW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.WIG 0B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHRSW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.WIG 0B /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULHUW--Multiply Packed Unsigned Integers and Store High Result.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULHUW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E4 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed unsigned word integers in xmm1 and xmm2/m128, and store the high 16 bits of the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHUW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F E4 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the packed unsigned word integers in xmm2 and xmm3/m128, and store the high 16 bits of the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHUW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F E4 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed unsigned word integers in ymm2 and ymm3/m256, and store the high 16 bits of the results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHUW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG E4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed unsigned word integers in xmm2 and xmm3/m128, and store the high 16 bits of the results in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHUW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG E4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed unsigned word integers in ymm2 and ymm3/m256, and store the high 16 bits of the results in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHUW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG E4 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed unsigned word integers in zmm2 and zmm3/m512, and store the high 16 bits of the results in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULHW--Multiply Packed Integers and Store High Result.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULHW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E5 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the high 16 bits of the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F E5 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in xmm2 and xmm3/m128, and store the high 16 bits of the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F E5 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in ymm2 and ymm3/m256, and store the high 16 bits of the results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG E5 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in xmm2 and xmm3/m128, and store the high 16 bits of the results in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG E5 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in ymm2 and ymm3/m256, and store the high 16 bits of the results in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG E5 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in zmm2 and zmm3/m512, and store the high 16 bits of the results in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULLD/PMULLQ--Multiply Packed Integers and Store Low Result.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULLD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 40 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Multiply the packed dword signed integers in xmm1 and xmm2/m128 and store the low 32 bits of each product in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 40 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the packed dword signed integers in xmm2 and xmm3/m128 and store the low 32 bits of each product in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 40 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed dword signed integers in ymm2 and ymm3/m256 and store the low 32 bits of each product in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 40 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply the packed dword signed integers in xmm2 and xmm3/m128/m32bcst and store the low 32 bits of each product in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 40 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply the packed dword signed integers in ymm2 and ymm3/m256/m32bcst and store the low 32 bits of each product in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 40 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply the packed dword signed integers in zmm2 and zmm3/m512/m32bcst and store the low 32 bits of each product in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 40 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Multiply the packed qword signed integers in xmm2 and xmm3/m128/m64bcst and store the low 64 bits of each product in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 40 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Multiply the packed qword signed integers in ymm2 and ymm3/m256/m64bcst and store the low 64 bits of each product in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 40 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Multiply the packed qword signed integers in zmm2 and zmm3/m512/m64bcst and store the low 64 bits of each product in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULLW--Multiply Packed Integers and Store Low Result.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULLW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D5 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F D5 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in xmm2 and xmm3/m128, and store the low 16 bits of the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F D5 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in ymm2 and ymm3/m256, and store the low 16 bits of the results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG D5 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in xmm2 and xmm3/m128, and store the low 16 bits of the results in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG D5 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in ymm2 and ymm3/m256, and store the low 16 bits of the results in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG D5 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in zmm2 and zmm3/m512, and store the low 16 bits of the results in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMULTISHIFTQB--Select Packed Unaligned Bytes from Quadword Sources.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULTISHIFTQB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 83 /r</opc>
+			<cpuid>
+				<flag>AVX512VBMI</flag>
+				<flag>AVX512VL</flag>
+			</cpuid>
+			<dscrp>Select unaligned bytes from qwords in xmm3/m128/m64bcst using control bytes in xmm2, write byte results to xmm1 under k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULTISHIFTQB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 83 /r</opc>
+			<cpuid>
+				<flag>AVX512VBMI</flag>
+				<flag>AVX512VL</flag>
+			</cpuid>
+			<dscrp>Select unaligned bytes from qwords in ymm3/m256/m64bcst using control bytes in ymm2, write byte results to ymm1 under k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULTISHIFTQB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 83 /r</opc>
+			<cpuid>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Select unaligned bytes from qwords in zmm3/m512/m64bcst using control bytes in zmm2, write byte results to zmm1 under k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULUDQ--Multiply Packed Unsigned Doubleword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULUDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F4 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers in xmm2/m128, and store the quadword results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULUDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F4 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed unsigned doubleword integers in xmm2 by packed unsigned doubleword integers in xmm3/m128, and store the quadword results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULUDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F4 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply packed unsigned doubleword integers in ymm2 by packed unsigned doubleword integers in ymm3/m256, and store the quadword results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULUDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 F4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed unsigned doubleword integers in xmm2 by packed unsigned doubleword integers in xmm3/m128/m64bcst, and store the quadword results in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULUDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 F4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed unsigned doubleword integers in ymm2 by packed unsigned doubleword integers in ymm3/m256/m64bcst, and store the quadword results in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULUDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 F4 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed unsigned doubleword integers in zmm2 by packed unsigned doubleword integers in zmm3/m512/m64bcst, and store the quadword results in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>POR--Bitwise Logical Or.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>POR</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F EB /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPOR</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG EB /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of xmm2/m128 and xmm3.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPOR</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG EB /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of ymm2/m256 and ymm3.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPORD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 EB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of packed doubleword integers in xmm2 and xmm3/m128/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPORD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 EB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of packed doubleword integers in ymm2 and ymm3/m256/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPORD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 EB /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of packed doubleword integers in zmm2 and zmm3/m512/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPORQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 EB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of packed quadword integers in xmm2 and xmm3/m128/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPORQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 EB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of packed quadword integers in ymm2 and ymm3/m256/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPORQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 EB /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of packed quadword integers in zmm2 and zmm3/m512/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PROLD/PROLVD/PROLQ/PROLVQ--Bit Rotate Left.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLVD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in xmm2 left by count in the corresponding element of xmm3/m128/m32bcst. Result written to xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.128.66.0F.W0 72 /1 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in xmm2/m128/m32bcst left by imm8. Result written to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLVQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in xmm2 left by count in the corresponding element of xmm3/m128/m64bcst. Result written to xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.128.66.0F.W1 72 /1 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in xmm2/m128/m64bcst left by imm8. Result written to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLVD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in ymm2 left by count in the corresponding element of ymm3/m256/m32bcst. Result written to ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.256.66.0F.W0 72 /1 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in ymm2/m256/m32bcst left by imm8. Result written to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLVQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in ymm2 left by count in the corresponding element of ymm3/m256/m64bcst. Result written to ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.256.66.0F.W1 72 /1 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in ymm2/m256/m64bcst left by imm8. Result written to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLVD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 15 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate left of doublewords in zmm2 by count in the corresponding element of zmm3/m512/m32bcst. Result written to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.512.66.0F.W0 72 /1 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate left of doublewords in zmm3/m512/m32bcst by imm8. Result written to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLVQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 15 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in zmm2 left by count in the corresponding element of zmm3/m512/m64bcst. Result written to zmm1under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.512.66.0F.W1 72 /1 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in zmm2/m512/m64bcst left by imm8. Result written to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV-VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PRORD/PRORVD/PRORQ/PRORVQ--Bit Rotate  Right.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORVD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in xmm2 right by count in the corresponding element of xmm3/m128/m32bcst, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.128.66.0F.W0 72 /0 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in xmm2/m128/m32bcst right by imm8, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORVQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in xmm2 right by count in the corresponding element of xmm3/m128/m64bcst, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.128.66.0F.W1 72 /0 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in xmm2/m128/m64bcst right by imm8, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORVD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in ymm2 right by count in the corresponding element of ymm3/m256/m32bcst, store using result writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.256.66.0F.W0 72 /0 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in ymm2/m256/m32bcst right by imm8, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORVQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in ymm2 right by count in the corresponding element of ymm3/m256/m64bcst, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.256.66.0F.W1 72 /0 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in ymm2/m256/m64bcst right by imm8, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORVD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 14 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in zmm2 right by count in the corresponding element of zmm3/m512/m32bcst, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.512.66.0F.W0 72 /0 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in zmm2/m512/m32bcst right by imm8, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORVQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 14 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in zmm2 right by count in the corresponding element of zmm3/m512/m64bcst, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.512.66.0F.W1 72 /0 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in zmm2/m512/m64bcst right by imm8, store result using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV-VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPSCATTERDD/VPSCATTERDQ/VPSCATTERQD/VPSCATTERQQ--Scatter Packed Dword, Packed Qword with Signed Dword, Signed Qword Indices.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERDD</mnem>
+			<args>vm32x {k1},xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 A0 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter dword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERDD</mnem>
+			<args>vm32y {k1},ymm1</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 A0 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter dword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERDD</mnem>
+			<args>vm32z {k1},zmm1</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 A0 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter dword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERDQ</mnem>
+			<args>vm32x {k1},xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 A0 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter qword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERDQ</mnem>
+			<args>vm32x {k1},ymm1</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 A0 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter qword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERDQ</mnem>
+			<args>vm32y {k1},zmm1</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 A0 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter qword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERQD</mnem>
+			<args>vm64x {k1},xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 A1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter dword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERQD</mnem>
+			<args>vm64y {k1},xmm1</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 A1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter dword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERQD</mnem>
+			<args>vm64z {k1},ymm1</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 A1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter dword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERQQ</mnem>
+			<args>vm64x {k1},xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 A1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter qword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERQQ</mnem>
+			<args>vm64y {k1},ymm1</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 A1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter qword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERQQ</mnem>
+			<args>vm64z {k1},zmm1</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 A1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter qword values to memory using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSHUFB--Packed Shuffle Bytes.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSHUFB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 00 /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Shuffle bytes in xmm1 according to contents of xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38 00 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle bytes in xmm2 according to contents of xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38 00 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shuffle bytes in ymm2 according to contents of ymm3/m256.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.WIG 00 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shuffle bytes in xmm2 according to contents of xmm3/m128 under write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.WIG 00 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shuffle bytes in ymm2 according to contents of ymm3/m256 under write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.WIG 00 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shuffle bytes in zmm2 according to contents of zmm3/m512 under write mask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSHUFHW--Shuffle Packed High Words.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSHUFHW</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">F3 0F 70 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFHW</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">VEX.128.F3.0F 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFHW</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.F3.0F 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shuffle the high words in ymm2/m256 based on the encoding in imm8 and store the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFHW</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128,imm8</args>
+			<opc openc="FVM">EVEX.128.F3.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1 under write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFHW</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256,imm8</args>
+			<opc openc="FVM">EVEX.256.F3.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shuffle the high words in ymm2/m256 based on the encoding in imm8 and store the result in ymm1 under write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFHW</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512,imm8</args>
+			<opc openc="FVM">EVEX.512.F3.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shuffle the high words in zmm2/m512 based on the encoding in imm8 and store the result in zmm1 under write mask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSHUFLW--Shuffle Packed Low Words.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSHUFLW</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">F2 0F 70 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFLW</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">VEX.128.F2.0F 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFLW</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.F2.0F 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shuffle the low words in ymm2/m256 based on the encoding in imm8 and store the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFLW</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128,imm8</args>
+			<opc openc="FVM">EVEX.128.F2.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1 under write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFLW</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256,imm8</args>
+			<opc openc="FVM">EVEX.256.F2.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shuffle the low words in ymm2/m256 based on the encoding in imm8 and store the result in ymm1 under write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFLW</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512,imm8</args>
+			<opc openc="FVM">EVEX.512.F2.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shuffle the low words in zmm2/m512 based on the encoding in imm8 and store the result in zmm1 under write mask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSHUFD--Shuffle Packed Doublewords.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSHUFD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F 70 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">VEX.128.66.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFD</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.66.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shuffle the doublewords in ymm2/m256 based on the encoding in imm8 and store the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F.W0 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle the doublewords in xmm2/m128/m32bcst based on the encoding in imm8 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F.W0 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle the doublewords in ymm2/m256/m32bcst based on the encoding in imm8 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.512.66.0F.W0 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle the doublewords in zmm2/m512/m32bcst based on the encoding in imm8 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSLLDQ--Byte Shift Left.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLDQ</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 73 /7 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift xmm1 left by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLDQ</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F 73 /7 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift xmm2 left by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLDQ</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F 73 /7 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift ymm2 left by imm8 bytes while shifting in 0s and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLDQ</mnem>
+			<args>xmm1,xmm2/ m128,imm8</args>
+			<opc openc="FVM">EVEX.NDD.128.66.0F 73 /7 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift xmm2/m128 left by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLDQ</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDD.256.66.0F 73 /7 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift ymm2/m256 left by imm8 bytes while shifting in 0s and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLDQ</mnem>
+			<args>zmm1,zmm2/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDD.512.66.0F 73 /7 ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift zmm2/m512 left by imm8 bytes while shifting in 0s and store result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>Imm8</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>EVEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSLLW/PSLLD/PSLLQ--Bit Shift Left.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F1/r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm1 left by amount specified in xmm2/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLW</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 71 /6 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm1 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLD</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 72 /6 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm1 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLQ</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 73 /6 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm1 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F1 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 71 /6 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F2 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 72 /6 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F3 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 73 /6 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F1 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 71 /6 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F2 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 72 /6 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F3 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 73 /6 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.128.66.0F.WIG F1 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 left by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.256.66.0F.WIG F1 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 left by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.512.66.0F.WIG F1 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in zmm2 left by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDD.128.66.0F.WIG 71 /6 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2/m128 left by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDD.256.66.0F.WIG 71 /6 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2/m256 left by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDD.512.66.0F.WIG 71 /6 ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in zmm2/m512 left by imm8 while shifting in 0 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.128.66.0F.W0 F2 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 left by amount specified in xmm3/m128 while shifting in 0s under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.256.66.0F.W0 F2 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 left by amount specified in xmm3/m128 while shifting in 0s under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.512.66.0F.W0 F2 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in zmm2 left by amount specified in xmm3/m128 while shifting in 0s under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.128.66.0F.W0 72 /6 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2/m128/m32bcst left by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.256.66.0F.W0 72 /6 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2/m256/m32bcst left by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.512.66.0F.W0 72 /6 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in zmm2/m512/m32bcst left by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.128.66.0F.W1 F3 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 left by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.256.66.0F.W1 F3 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 left by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.512.66.0F.W1 F3 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in zmm2 left by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.128.66.0F.W1 73 /6 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2/m128/m64bcst left by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.256.66.0F.W1 73 /6 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2/m256/m64bcst left by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.512.66.0F.W1 73 /6 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in zmm2/m512/m64bcst left by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>Imm8</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>EVEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVI">
+			<oprnd1>EVEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="M128">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSRAW/PSRAD/PSRAQ--Bit Shift Arithmetic Right.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRAW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E1/r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm1 right by amount specified in xmm2/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRAW</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 71 /4 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm1 right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRAD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E2 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm1 right by amount specified in xmm2/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRAD</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 72 /4 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm1 right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG E1 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by amount specified in xmm3/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 71 /4 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG E2 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by amount specified in xmm3/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 72 /4 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>ymm1,ymm2,ymm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG E1 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by amount specified in ymm3/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 71 /4 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG E2 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by amount specified in ymm3/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 72 /4 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.128.66.0F.WIG E1 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.256.66.0F.WIG E1 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.512.66.0F.WIG E1 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in zmm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDD.128.66.0F.WIG 71 /4 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2/m128 right by imm8 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDD.256.66.0F.WIG 71 /4 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2/m256 right by imm8 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDD.512.66.0F.WIG 71 /4 ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in zmm2/m512 right by imm8 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.128.66.0F.W0 E2 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.256.66.0F.W0 E2 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.512.66.0F.W0 E2 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in zmm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.128.66.0F.W0 72 /4 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2/m128/m32bcst right by imm8 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.256.66.0F.W0 72 /4 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2/m256/m32bcst right by imm8 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.512.66.0F.W0 72 /4 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in zmm2/m512/m32bcst right by imm8 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.128.66.0F.W1 E2 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.256.66.0F.W1 E2 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.512.66.0F.W1 E2 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in zmm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.128.66.0F.W1 72 /4 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2/m128/m64bcst right by imm8 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.256.66.0F.W1 72 /4 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2/m256/m64bcst right by imm8 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.512.66.0F.W1 72 /4 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in zmm2/m512/m64bcst right by imm8 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>Imm8</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>EVEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVI">
+			<oprnd1>EVEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="M128">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSRLDQ--Byte Shift Right.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLDQ</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 73 /3 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift xmm1 right by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLDQ</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F 73 /3 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift xmm2 right by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLDQ</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F 73 /3 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift ymm2 right by imm8 bytes while shifting in 0s and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLDQ</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDD.128.66.0F.WIG 73 /3 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift xmm2/m128 right by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLDQ</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDD.256.66.0F.WIG 73 /3 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift ymm2/m256 right by imm8 bytes while shifting in 0s and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLDQ</mnem>
+			<args>zmm1,zmm2/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDD.512.66.0F.WIG 73 /3 ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift zmm2/m512 right by imm8 bytes while shifting in 0s and store result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>Imm8</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>EVEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSRLW/PSRLD/PSRLQ--Shift Packed Data Right Logical.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D1 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm1 right by amount specified in xmm2/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLW</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 71 /2 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm1 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D2 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm1 right by amount specified in xmm2/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLD</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 72 /2 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm1 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D3 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm1 right by amount specified in xmm2/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLQ</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 73 /2 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm1 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D1 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 71 /2 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D2 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 72 /2 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D3 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 73 /2 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D1 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 71 /2 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D2 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 72 /2 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D3 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 73 /2 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.128.66.0F.WIG D1 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.256.66.0F.WIG D1 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.512.66.0F.WIG D1 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in zmm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDD.128.66.0F.WIG 71 /2 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2/m128 right by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDD.256.66.0F.WIG 71 /2 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2/m256 right by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDD.512.66.0F.WIG 71 /2 ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in zmm2/m512 right by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.128.66.0F.W0 D2 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.256.66.0F.W0 D2 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.512.66.0F.W0 D2 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in zmm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.128.66.0F.W0 72 /2 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2/m128/m32bcst right by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.256.66.0F.W0 72 /2 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2/m256/m32bcst right by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.512.66.0F.W0 72 /2 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in zmm2/m512/m32bcst right by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.128.66.0F.W1 D3 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.256.66.0F.W1 D3 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.512.66.0F.W1 D3 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in zmm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.128.66.0F.W1 73 /2 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2/m128/m64bcst right by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.256.66.0F.W1 73 /2 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2/m256/m64bcst right by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.512.66.0F.W1 73 /2 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in zmm2/m512/m64bcst right by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>Imm8</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>EVEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVI">
+			<oprnd1>EVEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="M128">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPSLLVW/VPSLLVD/VPSLLVQ--Variable Bit Shift Left Logical.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 47 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 left by amount specified in the corresponding element of xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 47 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 left by amount specified in the corresponding element of xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 47 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 left by amount specified in the corresponding element of ymm3/m256 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 47 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 left by amount specified in the corresponding element of ymm3/m256 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.W1 12 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 left by amount specified in the corresponding element of xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W1 12 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 left by amount specified in the corresponding element of ymm3/m256 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W1 12 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in zmm2 left by amount specified in the corresponding element of zmm3/m512 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 47 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 left by amount specified in the corresponding element of xmm3/m128/m32bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 47 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 left by amount specified in the corresponding element of ymm3/m256/m32bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 47 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in zmm2 left by amount specified in the corresponding element of zmm3/m512/m32bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 47 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 left by amount specified in the corresponding element of xmm3/m128/m64bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 47 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 left by amount specified in the corresponding element of ymm3/m256/m64bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 47 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in zmm2 left by amount specified in the corresponding element of zmm3/m512/m64bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPSRLVW/VPSRLVD/VPSRLVQ--Variable Bit Shift Right Logical.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 45 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by amount specified in the corresponding element of xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 45 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 right by amount specified in the corresponding element of xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 45 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by amount specified in the corresponding element of ymm3/m256 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 45 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 right by amount specified in the corresponding element of ymm3/m256 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.W1 10 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by amount specified in the corresponding element of xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W1 10 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by amount specified in the corresponding element of ymm3/m256 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W1 10 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in zmm2 right by amount specified in the corresponding element of zmm3/m512 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 45 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by amount specified in the corresponding element of xmm3/m128/m32bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 45 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by amount specified in the corresponding element of ymm3/m256/m32bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 45 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in zmm2 right by amount specified in the corresponding element of zmm3/m512/m32bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 45 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 right by amount specified in the corresponding element of xmm3/m128/m64bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 45 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 right by amount specified in the corresponding element of ymm3/m256/m64bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 45 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in zmm2 right by amount specified in the corresponding element of zmm3/m512/m64bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSUBB/PSUBW/PSUBD/PSUBQ--Packed Integer Subtract.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F8 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed byte integers in xmm2/m128 from xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F9 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed word integers in xmm2/m128 from xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F FA /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed doubleword integers in xmm2/m128 from xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F FB/r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed quadword integers in xmm2/m128 from xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F8 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed byte integers in xmm3/m128 from xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F9 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed word integers in xmm3/m128 from xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FA /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed doubleword integers in xmm3/m128 from xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FB/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed quadword integers in xmm3/m128 from xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F8 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed byte integers in ymm3/m256 from ymm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F9 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed word integers in ymm3/m256 from ymm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FA /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed doubleword integers in ymm3/m256 from ymm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FB/r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed quadword integers in ymm3/m256 from ymm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG F8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed byte integers in xmm3/m128 from xmm2 and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG F8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed byte integers in ymm3/m256 from ymm2 and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG F8 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed byte integers in zmm3/m512 from zmm2 and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG F9 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed word integers in xmm3/m128 from xmm2 and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG F9 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed word integers in ymm3/m256 from ymm2 and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG F9 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed word integers in zmm3/m512 from zmm2 and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 FA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed doubleword integers in xmm3/m128/m32bcst from xmm2 and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 FA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed doubleword integers in ymm3/m256/m32bcst from ymm2 and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 FA /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed doubleword integers in zmm3/m512/m32bcst from zmm2 and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 FB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed quadword integers in xmm3/m128/m64bcst from xmm2 and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 FB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed quadword integers in ymm3/m256/m64bcst from ymm2 and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 FB/r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed quadword integers in zmm3/m512/m64bcst from zmm2 and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSUBSB/PSUBSW--Subtract Packed Signed Integers with Signed Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E8 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed byte integers in xmm2/m128 from packed signed byte integers in xmm1 and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E9 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed word integers in xmm2/m128 from packed signed word integers in xmm1 and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F E8 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed byte integers in xmm3/m128 from packed signed byte integers in xmm2 and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F E9 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed word integers in xmm3/m128 from packed signed word integers in xmm2 and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F E8 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed byte integers in ymm3/m256 from packed signed byte integers in ymm2 and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F E9 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed word integers in ymm3/m256 from packed signed word integers in ymm2 and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG E8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed byte integers in xmm3/m128 from packed signed byte integers in xmm2 and saturate results and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG E8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed byte integers in ymm3/m256 from packed signed byte integers in ymm2 and saturate results and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG E8 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed byte integers in zmm3/m512 from packed signed byte integers in zmm2 and saturate results and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG E9 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed word integers in xmm3/m128 from packed signed word integers in xmm2 and saturate results and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG E9 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed word integers in ymm3/m256 from packed signed word integers in ymm2 and saturate results and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG E9 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed word integers in zmm3/m512 from packed signed word integers in zmm2 and saturate results and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSUBUSB/PSUBUSW--Subtract Packed Unsigned Integers with Unsigned Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBUSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D8 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned byte integers in xmm2/m128 from packed unsigned byte integers in xmm1 and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBUSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D9 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned word integers in xmm2/m128 from packed unsigned word integers in xmm1 and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F D8 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned byte integers in xmm3/m128 from packed unsigned byte integers in xmm2 and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F D9 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned word integers in xmm3/m128 from packed unsigned word integers in xmm2 and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F D8 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned byte integers in ymm3/m256 from packed unsigned byte integers in ymm2 and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F D9 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned word integers in ymm3/m256 from packed unsigned word integers in ymm2 and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG D8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned byte integers in xmm3/m128 from packed unsigned byte integers in xmm2, saturate results and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG D8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned byte integers in ymm3/m256 from packed unsigned byte integers in ymm2, saturate results and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG D8 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned byte integers in zmm3/m512 from packed unsigned byte integers in zmm2, saturate results and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG D9 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned word integers in xmm3/m128 from packed unsigned word integers in xmm2 and saturate results and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG D9 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned word integers in ymm3/m256 from packed unsigned word integers in ymm2, saturate results and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG D9 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned word integers in zmm3/m512 from packed unsigned word integers in zmm2, saturate results and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPTESTNMB/W/D/Q--Logical NAND and Set.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMB</mnem>
+			<args>k2 {k1},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.F3.0F38.W0 26 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed byte integers in xmm2 and xmm3/m128 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMB</mnem>
+			<args>k2 {k1},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.F3.0F38.W0 26 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed byte integers in ymm2 and ymm3/m256 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMB</mnem>
+			<args>k2 {k1},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.F3.0F38.W0 26 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed byte integers in zmm2 and zmm3/m512 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMW</mnem>
+			<args>k2 {k1},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.F3.0F38.W1 26 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed word integers in xmm2 and xmm3/m128 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMW</mnem>
+			<args>k2 {k1},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.F3.0F38.W1 26 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed word integers in ymm2 and ymm3/m256 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMW</mnem>
+			<args>k2 {k1},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.F3.0F38.W1 26 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed word integers in zmm2 and zmm3/m512 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMD</mnem>
+			<args>k2 {k1},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.F3.0F38.W0 27 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed doubleword integers in xmm2 and xmm3/m128/m32bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMD</mnem>
+			<args>k2 {k1},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.F3.0F38.W0 27 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed doubleword integers in ymm2 and ymm3/m256/m32bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMD</mnem>
+			<args>k2 {k1},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.F3.0F38.W0 27 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed doubleword integers in zmm2 and zmm3/m512/m32bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMQ</mnem>
+			<args>k2 {k1},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.F3.0F38.W1 27 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed quadword integers in xmm2 and xmm3/m128/m64bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMQ</mnem>
+			<args>k2 {k1},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.F3.0F38.W1 27 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed quadword integers in ymm2 and ymm3/m256/m64bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMQ</mnem>
+			<args>k2 {k1},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.F3.0F38.W1 27 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed quadword integers in zmm2 and zmm3/m512/m64bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PUNPCKHBW/PUNPCKHWD/PUNPCKHDQ/PUNPCKHQDQ--Unpack High Data.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKHBW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 68 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order bytes from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKHWD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 69 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order words from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKHDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 6A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order doublewords from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKHQDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 6D /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order quadword from xmm1 and xmm2/m128 into xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHBW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 68 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave high-order bytes from xmm2 and xmm3/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHWD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 69 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave high-order words from xmm2 and xmm3/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 6A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave high-order doublewords from xmm2 and xmm3/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHQDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 6D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave high-order quadword from xmm2 and xmm3/m128 into xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHBW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 68 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order bytes from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHWD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 69 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order words from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 6A /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order doublewords from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHQDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 6D /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order quadword from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHBW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 68 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave high-order bytes from xmm2 and xmm3/m128 into xmm1 register using k1 write mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHWD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 69 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave high-order words from xmm2 and xmm3/m128 into xmm1 register using k1 write mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 6A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave high-order doublewords from xmm2 and xmm3/m128/m32bcst into xmm1 register using k1 write mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHQDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 6D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave high-order quadword from xmm2 and xmm3/m128/m64bcst into xmm1 register using k1 write mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHBW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 68 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave high-order bytes from ymm2 and ymm3/m256 into ymm1 register using k1 write mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHWD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 69 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave high-order words from ymm2 and ymm3/m256 into ymm1 register using k1 write mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 6A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave high-order doublewords from ymm2 and ymm3/m256/m32bcst into ymm1 register using k1 write mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHQDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 6D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave high-order quadword from ymm2 and ymm3/m256/m64bcst into ymm1 register using k1 write mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHBW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F 68/r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave high-order bytes from zmm2 and zmm3/m512 into zmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHWD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F 69/r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave high-order words from zmm2 and zmm3/m512 into zmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 6A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave high-order doublewords from zmm2 and zmm3/m512/m32bcst into zmm1 register using k1 write mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHQDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 6D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave high-order quadword from zmm2 and zmm3/m512/m64bcst into zmm1 register using k1 write mask.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PUNPCKLBW/PUNPCKLWD/PUNPCKLDQ/PUNPCKLQDQ--Unpack Low Data.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKLBW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 60 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order bytes from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKLWD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 61 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order words from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKLDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 62 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order doublewords from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKLQDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 6C /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order quadword from xmm1 and xmm2/m128 into xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLBW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 60 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave low-order bytes from xmm2 and xmm3/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLWD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 61 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave low-order words from xmm2 and xmm3/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 62 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave low-order doublewords from xmm2 and xmm3/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLQDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 6C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave low-order quadword from xmm2 and xmm3/m128 into xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLBW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 60 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order bytes from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLWD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 61 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order words from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 62 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order doublewords from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLQDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 6C /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order quadword from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLBW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 60 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave low-order bytes from xmm2 and xmm3/m128 into xmm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLWD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 61 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave low-order words from xmm2 and xmm3/m128 into xmm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 62 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave low-order doublewords from xmm2 and xmm3/m128/m32bcst into xmm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLQDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 6C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave low-order quadword from zmm2 and zmm3/m512/m64bcst into zmm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLBW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 60 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave low-order bytes from ymm2 and ymm3/m256 into ymm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLWD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 61 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave low-order words from ymm2 and ymm3/m256 into ymm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 62 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave low-order doublewords from ymm2 and ymm3/m256/m32bcst into ymm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLQDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 6C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave low-order quadword from ymm2 and ymm3/m256/m64bcst into ymm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLBW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F 60/r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave low-order bytes from zmm2 and zmm3/m512 into zmm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLWD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F 61/r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave low-order words from zmm2 and zmm3/m512 into zmm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 62 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave low-order doublewords from zmm2 and zmm3/m512/m32bcst into zmm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLQDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 6C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave low-order quadword from zmm2 and zmm3/m512/m64bcst into zmm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2--Shuffle Packed Values at 128-bit Granularity.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFF32X4</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 23 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle 128-bit packed single-precision floating-point values selected by imm8 from ymm2 and ymm3/m256/m32bcst and place results in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFF32x4</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 23 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle 128-bit packed single-precision floating-point values selected by imm8 from zmm2 and zmm3/m512/m32bcst and place results in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFF64X2</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 23 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle 128-bit packed double-precision floating-point values selected by imm8 from ymm2 and ymm3/m256/m64bcst and place results in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFF64x2</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 23 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle 128-bit packed double-precision floating-point values selected by imm8 from zmm2 and zmm3/m512/m64bcst and place results in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFI32X4</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 43 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle 128-bit packed double-word values selected by imm8 from ymm2 and ymm3/m256/m32bcst and place results in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFI32x4</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 43 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle 128-bit packed double-word values selected by imm8 from zmm2 and zmm3/m512/m32bcst and place results in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFI64X2</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 43 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle 128-bit packed quad-word values selected by imm8 from ymm2 and ymm3/m256/m64bcst and place results in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFI64x2</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 43 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle 128-bit packed quad-word values selected by imm8 from zmm2 and zmm3/m512/m64bcst and place results in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHUFPD--Packed Interleave Shuffle of Pairs of Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHUFPD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F C6 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shuffle two pairs of double-precision floating-point values from xmm1 and xmm2/m128 using imm8 to select from each pair, interleaved result is stored in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F.WIG C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle two pairs of double-precision floating-point values from xmm2 and xmm3/m128 using imm8 to select from each pair, interleaved result is stored in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F.WIG C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle four pairs of double-precision floating-point values from ymm2 and ymm3/m256 using imm8 to select from each pair, interleaved result is stored in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPD</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W1 C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle two paris of double-precision floating-point values from xmm2 and xmm3/m128/m64bcst using imm8 to select from each pair. store interleaved results in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPD</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W1 C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle four paris of double-precision floating-point values from ymm2 and ymm3/m256/m64bcst using imm8 to select from each pair. store interleaved results in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPD</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle eight paris of double-precision floating-point values from zmm2 and zmm3/m512/m64bcst using imm8 to select from each pair. store interleaved results in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHUFPS--Packed Interleave Shuffle of Quadruplets of Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHUFPS</mnem>
+			<args>xmm1,xmm3/m128,imm8</args>
+			<opc openc="RMI">0F C6 /r ib</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Select from quadruplet of single-precision floatingpoint values in xmm1 and xmm2/m128 using imm8, interleaved result pairs are stored in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.0F.WIG C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Select from quadruplet of single-precision floatingpoint values in xmm1 and xmm2/m128 using imm8, interleaved result pairs are stored in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.0F.WIG C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Select from quadruplet of single-precision floatingpoint values in ymm2 and ymm3/m256 using imm8, interleaved result pairs are stored in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPS</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Select from quadruplet of single-precision floatingpoint values in xmm1 and xmm2/m128 using imm8, interleaved result pairs are stored in xmm1, subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPS</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Select from quadruplet of single-precision floatingpoint values in ymm2 and ymm3/m256 using imm8, interleaved result pairs are stored in ymm1, subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPS</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Select from quadruplet of single-precision floatingpoint values in zmm2 and zmm3/m512 using imm8, interleaved result pairs are stored in zmm1, subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SQRTPD--Square Root of Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SQRTPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 51 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed double-precision floating-point values in xmm2/m128 and stores the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 51 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed double-precision floating-point values in xmm2/m128 and stores the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPD</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 51 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed double-precision floating-point values in ymm2/m256 and stores the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.0F.W1 51 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed double-precision floating-point values in xmm2/m128/m64bcst and stores the result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.0F.W1 51 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed double-precision floating-point values in ymm2/m256/m64bcst and stores the result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.66.0F.W1 51 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed double-precision floating-point values in zmm2/m512/m64bcst and stores the result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SQRTPS--Square Root of Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SQRTPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 51 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed single-precision floating-point values in xmm2/m128 and stores the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.0F.WIG 51 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed single-precision floating-point values in xmm2/m128 and stores the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPS</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.0F.WIG 51/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed single-precision floating-point values in ymm2/m256 and stores the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.0F.W0 51 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed single-precision floating-point values in xmm2/m128/m32bcst and stores the result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.0F.W0 51 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed single-precision floating-point values in ymm2/m256/m32bcst and stores the result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.512.0F.W0 51/r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed single-precision floating-point values in zmm2/m512/m32bcst and stores the result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SQRTSD--Compute Square Root of Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SQRTSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 51/</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Computes square root of the low double-precision floatingpoint value in xmm2/m64 and stores the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 51/</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes square root of the low double-precision floatingpoint value in xmm3/m64 and stores the results in xmm1. Also, upper double-precision floating-point value (bits[127:64]) from xmm2 is copied to xmm1[127:64].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 51/</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes square root of the low double-precision floatingpoint value in xmm3/m64 and stores the results in xmm1 under writemask k1. Also, upper double-precision floatingpoint value (bits[127:64]) from xmm2 is copied to xmm1[127:64].</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SQRTSS--Compute Square Root of Scalar Single-Precision Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SQRTSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 51 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Computes square root of the low single-precision floating-point value in xmm2/m32 and stores the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 51 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes square root of the low single-precision floating-point value in xmm3/m32 and stores the results in xmm1. Also, upper single-precision floating-point values (bits[127:32]) from xmm2 are copied to xmm1[127:32].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 51 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes square root of the low single-precision floating-point value in xmm3/m32 and stores the results in xmm1 under writemask k1. Also, upper single-precision floating-point values (bits[127:32]) from xmm2 are copied to xmm1[127:32].</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPTERNLOGD/VPTERNLOGQ--Bitwise Ternary Logic.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTERNLOGD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F3A.W0 25 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise ternary logic taking xmm1, xmm2 and xmm3/m128/m32bcst as source operands and writing the result to xmm1 under writemask k1 with dword granularity. The immediate value determines the specific binary function being implemented.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTERNLOGD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F3A.W0 25 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise ternary logic taking ymm1, ymm2 and ymm3/m256/m32bcst as source operands and writing the result to ymm1 under writemask k1 with dword granularity. The immediate value determines the specific binary function being implemented.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTERNLOGD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F3A.W0 25 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise ternary logic taking zmm1, zmm2 and zmm3/m512/m32bcst as source operands and writing the result to zmm1 under writemask k1 with dword granularity. The immediate value determines the specific binary function being implemented.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTERNLOGQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F3A.W1 25 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise ternary logic taking xmm1, xmm2 and xmm3/m128/m64bcst as source operands and writing the result to xmm1 under writemask k1 with qword granularity. The immediate value determines the specific binary function being implemented.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTERNLOGQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F3A.W1 25 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise ternary logic taking ymm1, ymm2 and ymm3/m256/m64bcst as source operands and writing the result to ymm1 under writemask k1 with qword granularity. The immediate value determines the specific binary function being implemented.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTERNLOGQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F3A.W1 25 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise ternary logic taking zmm1, zmm2 and zmm3/m512/m64bcst as source operands and writing the result to zmm1 under writemask k1 with qword granularity. The immediate value determines the specific binary function being implemented.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPTESTMB/VPTESTMW/VPTESTMD/VPTESTMQ--Logical AND and Set Mask.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMB</mnem>
+			<args>k2 {k1},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.W0 26 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed byte integers in xmm2 and xmm3/m128 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMB</mnem>
+			<args>k2 {k1},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W0 26 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed byte integers in ymm2 and ymm3/m256 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMB</mnem>
+			<args>k2 {k1},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W0 26 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed byte integers in zmm2 and zmm3/m512 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMW</mnem>
+			<args>k2 {k1},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.W1 26 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed word integers in xmm2 and xmm3/m128 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMW</mnem>
+			<args>k2 {k1},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W1 26 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed word integers in ymm2 and ymm3/m256 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMW</mnem>
+			<args>k2 {k1},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W1 26 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed word integers in zmm2 and zmm3/m512 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMD</mnem>
+			<args>k2 {k1},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 27 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed doubleword integers in xmm2 and xmm3/m128/m32bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMD</mnem>
+			<args>k2 {k1},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 27 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed doubleword integers in ymm2 and ymm3/m256/m32bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMD</mnem>
+			<args>k2 {k1},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 27 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed doubleword integers in zmm2 and zmm3/m512/m32bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMQ</mnem>
+			<args>k2 {k1},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 27 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed quadword integers in xmm2 and xmm3/m128/m64bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMQ</mnem>
+			<args>k2 {k1},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 27 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed quadword integers in ymm2 and ymm3/m256/m64bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMQ</mnem>
+			<args>k2 {k1},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 27 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed quadword integers in zmm2 and zmm3/m512/m64bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPSRAVW/VPSRAVD/VPSRAVQ--Variable Bit Shift Right Arithmetic.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 46 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by amount specified in the corresponding element of xmm3/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 46 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by amount specified in the corresponding element of ymm3/m256 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.W1 11 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by amount specified in the corresponding element of xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W1 11 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by amount specified in the corresponding element of ymm3/m256 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W1 11 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in zmm2 right by amount specified in the corresponding element of zmm3/m512 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 46 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by amount specified in the corresponding element of xmm3/m128/m32bcst while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 46 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by amount specified in the corresponding element of ymm3/m256/m32bcst while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 46 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in zmm2 right by amount specified in the corresponding element of zmm3/m512/m32bcst while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 46 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 right by amount specified in the corresponding element of xmm3/m128/m64bcst while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 46 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 right by amount specified in the corresponding element of ymm3/m256/m64bcst while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 46 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in zmm2 right by amount specified in the corresponding element of zmm3/m512/m64bcst while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PXOR/PXORD/PXORQ--Exclusive Or.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PXOR</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F EF /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPXOR</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG EF /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of xmm3/m128 and xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPXOR</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG EF /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of ymm3/m256 and ymm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPXORD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 EF /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of packed doubleword integers in xmm2 and xmm3/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPXORD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 EF /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of packed doubleword integers in ymm2 and ymm3/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPXORD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 EF /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of packed doubleword integers in zmm2 and zmm3/m512/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPXORQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 EF /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of packed quadword integers in xmm2 and xmm3/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPXORQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 EF /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of packed quadword integers in ymm2 and ymm3/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPXORQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 EF /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of packed quadword integers in zmm2 and zmm3/m512/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRANGEPD--Range Restriction Calculation For Packed Pairs of Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRANGEPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W1 50 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Calculate two RANGE operation output value from 2 pairs of double-precision floating-point values in xmm2 and xmm3/m128/m32bcst, store the results to xmm1 under the writemask k1. Imm8 specifies the comparison and sign of the range operation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRANGEPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 50 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Calculate four RANGE operation output value from 4pairs of double-precision floating-point values in ymm2 and ymm3/m256/m32bcst, store the results to ymm1 under the writemask k1. Imm8 specifies the comparison and sign of the range operation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRANGEPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 50 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Calculate eight RANGE operation output value from 8 pairs of double-precision floating-point values in zmm2 and zmm3/m512/m32bcst, store the results to zmm1 under the writemask k1. Imm8 specifies the comparison and sign of the range operation.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRANGEPS--Range Restriction Calculation For Packed Pairs of Float32 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRANGEPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W0 50 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Calculate four RANGE operation output value from 4 pairs of single-precision floating-point values in xmm2 and xmm3/m128/m32bcst, store the results to xmm1 under the writemask k1. Imm8 specifies the comparison and sign of the range operation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRANGEPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 50 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Calculate eight RANGE operation output value from 8 pairs of single-precision floating-point values in ymm2 and ymm3/m256/m32bcst, store the results to ymm1 under the writemask k1. Imm8 specifies the comparison and sign of the range operation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRANGEPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 50 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Calculate 16 RANGE operation output value from 16 pairs of single-precision floating-point values in zmm2 and zmm3/m512/m32bcst, store the results to zmm1 under the writemask k1. Imm8 specifies the comparison and sign of the range operation.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRANGESD--Range Restriction Calculation From a pair of Scalar Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRANGESD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W1 51 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Calculate a RANGE operation output value from 2 doubleprecision floating-point values in xmm2 and xmm3/m64, store the output to xmm1 under writemask. Imm8 specifies the comparison and sign of the range operation.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRANGESS--Range Restriction Calculation From a Pair of Scalar Float32 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRANGESS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae},imm8</args>
+			<opc openc="T1S">EVEX.ND.LIG.66.0F3A.W0 51 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Calculate a RANGE operation output value from 2 singleprecision floating-point values in xmm2 and xmm3/m32, store the output to xmm1 under writemask. Imm8 specifies the comparison and sign of the range operation.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRCP14PD--Compute Approximate Reciprocals of Packed Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP14PD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W1 4C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals of the packed doubleprecision floating-point values in xmm2/m128/m64bcst and stores the results in xmm1. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP14PD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W1 4C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals of the packed doubleprecision floating-point values in ymm2/m256/m64bcst and stores the results in ymm1. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP14PD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W1 4C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals of the packed doubleprecision floating-point values in zmm2/m512/m64bcst and stores the results in zmm1. Under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRCP14SD--Compute Approximate Reciprocal of Scalar Float64 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>T1S</mnem>
+			<args>VRCP14SD xmm1 {k1}{z},xmm2,xmm3/m64</args>
+			<opc openc="">EVEX.NDS.LIG.66.0F38.W1 4D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal of the scalar doubleprecision floating-point value in xmm3/m64 and stores the result in xmm1 using writemask k1. Also, upper double-precision floating-point value (bits[127:64]) from xmm2 is copied to xmm1[127:64].</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRCP14PS--Compute Approximate Reciprocals of Packed Float32 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP14PS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W0 4C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals of the packed singleprecision floating-point values in xmm2/m128/m32bcst and stores the results in xmm1. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP14PS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W0 4C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals of the packed singleprecision floating-point values in ymm2/m256/m32bcst and stores the results in ymm1. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP14PS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W0 4C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals of the packed singleprecision floating-point values in zmm2/m512/m32bcst and stores the results in zmm1. Under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRCP14SS--Compute Approximate Reciprocal of Scalar Float32 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP14SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W0 4D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal of the scalar singleprecision floating-point value in xmm3/m32 and stores the results in xmm1 using writemask k1. Also, upper doubleprecision floating-point value (bits[127:32]) from xmm2 is copied to xmm1[127:32].</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VREDUCEPD--Perform Reduction Transformation on Packed Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VREDUCEPD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W1 56 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Perform reduction transformation on packed double-precision floating point values in xmm2/m128/m32bcst by subtracting a number of fraction bits specified by the imm8 field. Stores the result in xmm1 register under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VREDUCEPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W1 56 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Perform reduction transformation on packed double-precision floating point values in ymm2/m256/m32bcst by subtracting a number of fraction bits specified by the imm8 field. Stores the result in ymm1 register under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VREDUCEPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W1 56 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Perform reduction transformation on double-precision floating point values in zmm2/m512/m32bcst by subtracting a number of fraction bits specified by the imm8 field. Stores the result in zmm1 register under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VREDUCESD--Perform a Reduction Transformation on a Scalar Float64 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VREDUCESD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W1 57 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Perform a reduction transformation on a scalar double-precision floating point value in xmm3/m64 by subtracting a number of fraction bits specified by the imm8 field. Also, upper double precision floating-point value (bits[127:64]) from xmm2 are copied to xmm1[127:64]. Stores the result in xmm1 register.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VREDUCEPS--Perform Reduction Transformation on Packed Float32 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VREDUCEPS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W0 56 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Perform reduction transformation on packed single-precision floating point values in xmm2/m128/m32bcst by subtracting a number of fraction bits specified by the imm8 field. Stores the result in xmm1 register under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VREDUCEPS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W0 56 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Perform reduction transformation on packed single-precision floating point values in ymm2/m256/m32bcst by subtracting a number of fraction bits specified by the imm8 field. Stores the result in ymm1 register under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VREDUCEPS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W0 56 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Perform reduction transformation on packed single-precision floating point values in zmm2/m512/m32bcst by subtracting a number of fraction bits specified by the imm8 field. Stores the result in zmm1 register under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VREDUCESS--Perform a Reduction Transformation on a Scalar Float32 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VREDUCESS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W0 57 /r /ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Perform a reduction transformation on a scalar single-precision floating point value in xmm3/m32 by subtracting a number of fraction bits specified by the imm8 field. Also, upper single precision floating-point values (bits[127:32]) from xmm2 are copied to xmm1[127:32]. Stores the result in xmm1 register.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRNDSCALEPD--Round Packed Float64 Values To Include A Given Number Of Fraction Bits.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRNDSCALEPD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W1 09 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rounds packed double-precision floating point values in xmm2/m128/m64bcst to a number of fraction bits specified by the imm8 field. Stores the result in xmm1 register. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRNDSCALEPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W1 09 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rounds packed double-precision floating point values in ymm2/m256/m64bcst to a number of fraction bits specified by the imm8 field. Stores the result in ymm1 register. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRNDSCALEPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W1 09 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rounds packed double-precision floating-point values in zmm2/m512/m64bcst to a number of fraction bits specified by the imm8 field. Stores the result in zmm1 register using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRNDSCALESD--Round Scalar Float64 Value To Include A Given Number Of Fraction Bits.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRNDSCALESD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W1 0B /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rounds scalar double-precision floating-point value in xmm3/m64 to a number of fraction bits specified by the imm8 field. Stores the result in xmm1 register.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRNDSCALEPS--Round Packed Float32 Values To Include A Given Number Of Fraction Bits.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRNDSCALEPS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W0 08 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rounds packed single-precision floating point values in xmm2/m128/m32bcst to a number of fraction bits specified by the imm8 field. Stores the result in xmm1 register. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRNDSCALEPS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W0 08 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rounds packed single-precision floating point values in ymm2/m256/m32bcst to a number of fraction bits specified by the imm8 field. Stores the result in ymm1 register. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRNDSCALEPS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W0 08 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rounds packed single-precision floating-point values in zmm2/m512/m32bcst to a number of fraction bits specified by the imm8 field. Stores the result in zmm1 register using writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRNDSCALESS--Round Scalar Float32 Value To Include A Given Number Of Fraction Bits.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRNDSCALESS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W0 0A /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rounds scalar single-precision floating-point value in xmm3/m32 to a number of fraction bits specified by the imm8 field. Stores the result in xmm1 register under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRSQRT14PD--Compute Approximate Reciprocals of Square Roots of Packed Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT14PD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W1 4E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal square roots of the packed double-precision floating-point values in xmm2/m128/m64bcst and stores the results in xmm1. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT14PD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W1 4E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal square roots of the packed double-precision floating-point values in ymm2/m256/m64bcst and stores the results in ymm1. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT14PD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W1 4E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal square roots of the packed double-precision floating-point values in zmm2/m512/m64bcst and stores the results in zmm1 under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRSQRT14SD--Compute Approximate Reciprocal of Square Root of Scalar Float64 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT14SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W1 4F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal square root of the scalar double-precision floating-point value in xmm3/m64 and stores the result in the low quadword element of xmm1 using writemask k1. Bits[127:64] of xmm2 is copied to xmm1[127:64].</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRSQRT14PS--Compute Approximate Reciprocals of Square Roots of Packed Float32 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT14PS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W0 4E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal square roots of the packed single-precision floating-point values in xmm2/m128/m32bcst and stores the results in xmm1. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT14PS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W0 4E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal square roots of the packed single-precision floating-point values in ymm2/m256/m32bcst and stores the results in ymm1. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT14PS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W0 4E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal square roots of the packed single-precision floating-point values in zmm2/m512/m32bcst and stores the results in zmm1. Under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRSQRT14SS--Compute Approximate Reciprocal of Square Root of Scalar Float32 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT14SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W0 4F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal square root of the scalar single-precision floating-point value in xmm3/m32 and stores the result in the low doubleword element of xmm1 using writemask k1. Bits[127:32] of xmm2 is copied to xmm1[127:32].</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VSCALEFPD--Scale Packed Float64 Values With Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCALEFPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 2C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Scale the packed double-precision floating-point values in xmm2 using values from xmm3/m128/m64bcst. Under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCALEFPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 2C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Scale the packed double-precision floating-point values in ymm2 using values from ymm3/m256/m64bcst. Under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCALEFPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 2C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Scale the packed double-precision floating-point values in zmm2 using values from zmm3/m512/m64bcst. Under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VSCALEFSD--Scale Scalar Float64 Values With Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCALEFSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W1 2D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Scale the scalar double-precision floating-point values in xmm2 using the value from xmm3/m64. Under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VSCALEFPS--Scale Packed Float32 Values With Float32 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCALEFPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 2C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Scale the packed single-precision floating-point values in xmm2 using values from xmm3/m128/m32bcst. Under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCALEFPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 2C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Scale the packed single-precision values in ymm2 using floating point values from ymm3/m256/m32bcst. Under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCALEFPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 2C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Scale the packed single-precision floating-point values in zmm2 using floating-point values from zmm3/m512/m32bcst. Under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VSCALEFSS--Scale Scalar Float32 Value With Float32 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCALEFSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W0 2D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Scale the scalar single-precision floating-point value in xmm2 using floating-point value from xmm3/m32. Under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VSCATTERDPS/VSCATTERDPD/VSCATTERQPS/VSCATTERQPD--Scatter Packed Single, Packed Double with Signed Dword and Qword Indices.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERDPS</mnem>
+			<args>vm32x {k1},xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 A2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter single-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERDPS</mnem>
+			<args>vm32y {k1},ymm1</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 A2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter single-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERDPS</mnem>
+			<args>vm32z {k1},zmm1</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 A2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter single-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERDPD</mnem>
+			<args>vm32x {k1},xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 A2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter double-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERDPD</mnem>
+			<args>vm32x {k1},ymm1</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 A2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter double-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERDPD</mnem>
+			<args>vm32y {k1},zmm1</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 A2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter double-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERQPS</mnem>
+			<args>vm64x {k1},xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 A3 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter single-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERQPS</mnem>
+			<args>vm64y {k1},xmm1</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 A3 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter single-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERQPS</mnem>
+			<args>vm64z {k1},ymm1</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 A3 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter single-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERQPD</mnem>
+			<args>vm64x {k1},xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 A3 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter double-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERQPD</mnem>
+			<args>vm64y {k1},ymm1</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 A3 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter double-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERQPD</mnem>
+			<args>vm64z {k1},zmm1</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 A3 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter double-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SUBPD--Subtract Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SUBPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 5C /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed double-precision floating-point values in xmm2/mem from xmm1 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 5C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed double-precision floating-point values in xmm3/mem from xmm2 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 5C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed double-precision floating-point values in ymm3/mem from ymm2 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 5C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed double-precision floating-point values from xmm3/m128/m64bcst to xmm2 and store result in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 5C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed double-precision floating-point values from ymm3/m256/m64bcst to ymm2 and store result in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 5C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed double-precision floating-point values from zmm3/m512/m64bcst to zmm2 and store result in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SUBPS--Subtract Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SUBPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 5C /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Subtract packed single-precision floating-point values in xmm2/mem from xmm1 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 5C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed single-precision floating-point values in xmm3/mem from xmm2 and stores result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 5C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed single-precision floating-point values in ymm3/mem from ymm2 and stores result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 5C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed single-precision floating-point values from xmm3/m128/m32bcst to xmm2 and stores result in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 5C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed single-precision floating-point values from ymm3/m256/m32bcst to ymm2 and stores result in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 5C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed single-precision floating-point values in zmm3/m512/m32bcst from zmm2 and stores result in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SUBSD--Subtract Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SUBSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 5C /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract the low double-precision floating-point value in xmm2/m64 from xmm1 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 5C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract the low double-precision floating-point value in xmm3/m64 from xmm2 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 5C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract the low double-precision floating-point value in xmm3/m64 from xmm2 and store the result in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SUBSS--Subtract Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SUBSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 5C /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Subtract the low single-precision floating-point value in xmm2/m32 from xmm1 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 5C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract the low single-precision floating-point value in xmm3/m32 from xmm2 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 5C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract the low single-precision floating-point value in xmm3/m32 from xmm2 and store the result in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UCOMISD--Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UCOMISD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0F 2E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/mem64 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUCOMISD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 2E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/mem64 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUCOMISD</mnem>
+			<args>xmm1,xmm2/m64{sae}</args>
+			<opc openc="T1S">EVEX.LIG.66.0F.W1 2E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/m64 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UCOMISS--Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UCOMISS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">0F 2E /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUCOMISS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.0F.WIG 2E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUCOMISS</mnem>
+			<args>xmm1,xmm2/m32{sae}</args>
+			<opc openc="T1S">EVEX.LIG.0F.W0 2E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UNPCKHPD--Unpack and Interleave High Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UNPCKHPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 15 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double-precision floating-point values from high quadwords of xmm1 and xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 15 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double-precision floating-point values from high quadwords of xmm2 and xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 15 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double-precision floating-point values from high quadwords of ymm2 and ymm3/m256.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double precision floating-point values from high quadwords of xmm2 and xmm3/m128/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double precision floating-point values from high quadwords of ymm2 and ymm3/m256/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 15 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double-precision floating-point values from high quadwords of zmm2 and zmm3/m512/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UNPCKHPS--Unpack and Interleave High Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UNPCKHPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 15 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of xmm1 and xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 15 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of xmm2 and xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 15 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of ymm2 and ymm3/m256.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of xmm2 and xmm3/m128/m32bcst and write result to xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of ymm2 and ymm3/m256/m32bcst and write result to ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 15 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of zmm2 and zmm3/m512/m32bcst and write result to zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UNPCKLPD--Unpack and Interleave Low Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UNPCKLPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 14 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double-precision floating-point values from low quadwords of xmm1 and xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 14 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double-precision floating-point values from low quadwords of xmm2 and xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 14 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double-precision floating-point values from low quadwords of ymm2 and ymm3/m256.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double precision floating-point values from low quadwords of xmm2 and xmm3/m128/m64bcst subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double precision floating-point values from low quadwords of ymm2 and ymm3/m256/m64bcst subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 14 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double-precision floating-point values from low quadwords of zmm2 and zmm3/m512/m64bcst subject to write mask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UNPCKLPS--Unpack and Interleave Low Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UNPCKLPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 14 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of xmm1 and xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 14 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of xmm2 and xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ymm1,ymm2,ymm3/m256</mnem>
+			<args>void</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 14 /r VUNPCKLPS</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of ymm2 and ymm3/m256.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of xmm2 and xmm3/mem and write result to xmm1 subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of ymm2 and ymm3/mem and write result to ymm1 subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 14 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of zmm2 and zmm3/m512/m32bcst and write result to zmm1 subject to write mask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XORPD--Bitwise Logical XOR of Packed Double Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XORPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 57/r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed doubleprecision floating-point values in xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 57 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed doubleprecision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 57 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed doubleprecision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 57 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed doubleprecision floating-point values in xmm2 and xmm3/m128/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 57 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed doubleprecision floating-point values in ymm2 and ymm3/m256/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 57 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed doubleprecision floating-point values in zmm2 and zmm3/m512/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XORPS--Bitwise Logical XOR of Packed Single Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XORPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 57 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 57 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 57 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 57 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in xmm2 and xmm3/m128/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 57 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in ymm2 and ymm3/m256/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 57 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in zmm2 and zmm3/m512/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KADDW/KADDB/KADDQ/KADDD--ADD Two Masks.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KADDW</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.0F.W0 4A /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Add 16 bits masks in k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KADDB</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W0 4A /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Add 8 bits masks in k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KADDQ</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.0F.W1 4A /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add 64 bits masks in k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KADDD</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W1 4A /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add 32 bits masks in k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.1vvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KANDW/KANDB/KANDQ/KANDD--Bitwise Logical AND Masks.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KANDW</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.NDS.L1.0F.W0 41 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND 16 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KANDB</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W0 41 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Bitwise AND 8 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KANDQ</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.0F.W1 41 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND 64 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KANDD</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W1 41 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND 32 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.1vvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KANDNW/KANDNB/KANDNQ/KANDND--Bitwise Logical AND NOT Masks.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KANDNW</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.NDS.L1.0F.W0 42 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT 16 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KANDNB</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W0 42 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT 8 bits masks k1 and k2 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KANDNQ</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.0F.W1 42 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT 64 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KANDND</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W1 42 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT 32 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.1vvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KMOVW/KMOVB/KMOVQ/KMOVD--Move from and to Mask Registers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVW</mnem>
+			<args>k1,k2/m16</args>
+			<opc openc="RM">VEX.L0.0F.W0 90 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move 16 bits mask from k2/m16 and store the result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVB</mnem>
+			<args>k1,k2/m8</args>
+			<opc openc="RM">VEX.L0.66.0F.W0 90 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Move 8 bits mask from k2/m8 and store the result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVQ</mnem>
+			<args>k1,k2/m64</args>
+			<opc openc="RM">VEX.L0.0F.W1 90 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move 64 bits mask from k2/m64 and store the result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVD</mnem>
+			<args>k1,k2/m32</args>
+			<opc openc="RM">VEX.L0.66.0F.W1 90 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move 32 bits mask from k2/m32 and store the result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVW</mnem>
+			<args>m16,k1</args>
+			<opc openc="MR">VEX.L0.0F.W0 91 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move 16 bits mask from k1 and store the result in m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVB</mnem>
+			<args>m8,k1</args>
+			<opc openc="MR">VEX.L0.66.0F.W0 91 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Move 8 bits mask from k1 and store the result in m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVQ</mnem>
+			<args>m64,k1</args>
+			<opc openc="MR">VEX.L0.0F.W1 91 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move 64 bits mask from k1 and store the result in m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVD</mnem>
+			<args>m32,k1</args>
+			<opc openc="MR">VEX.L0.66.0F.W1 91 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move 32 bits mask from k1 and store the result in m32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVW</mnem>
+			<args>k1,r32</args>
+			<opc openc="RR">VEX.L0.0F.W0 92 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move 16 bits mask from r32 to k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVB</mnem>
+			<args>k1,r32</args>
+			<opc openc="RR">VEX.L0.66.0F.W0 92 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Move 8 bits mask from r32 to k1.</dscrp>
+		</ins>
+		<ins x32m="I" x64m="V">
+			<mnem>KMOVQ</mnem>
+			<args>k1,r64</args>
+			<opc openc="RR">VEX.L0.F2.0F.W1 92 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move 64 bits mask from r64 to k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVD</mnem>
+			<args>k1,r32</args>
+			<opc openc="RR">VEX.L0.F2.0F.W0 92 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move 32 bits mask from r32 to k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVW</mnem>
+			<args>r32,k1</args>
+			<opc openc="RR">VEX.L0.0F.W0 93 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move 16 bits mask from k1 to r32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVB</mnem>
+			<args>r32,k1</args>
+			<opc openc="RR">VEX.L0.66.0F.W0 93 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Move 8 bits mask from k1 to r32.</dscrp>
+		</ins>
+		<ins x32m="I" x64m="V">
+			<mnem>KMOVQ</mnem>
+			<args>r64,k1</args>
+			<opc openc="RR">VEX.L0.F2.0F.W1 93 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move 64 bits mask from k1 to r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVD</mnem>
+			<args>r32,k1</args>
+			<opc openc="RR">VEX.L0.F2.0F.W0 93 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move 32 bits mask from k1 to r32.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w, ModRM:[7:6] must not be 11b)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KUNPCKBW/KUNPCKWD/KUNPCKDQ--Unpack for Mask Registers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KUNPCKBW</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.NDS.L1.66.0F.W0 4B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpack and interleave 8 bits masks in k2 and k3 and write word result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KUNPCKWD</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.NDS.L1.0F.W0 4B /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Unpack and interleave 16 bits in k2 and k3 and write doubleword result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KUNPCKDQ</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.NDS.L1.0F.W1 4B /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Unpack and interleave 32 bits masks in k2 and k3 and write quadword result in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.1vvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KNOTW/KNOTB/KNOTQ/KNOTD--NOT Mask Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KNOTW</mnem>
+			<args>k1,k2</args>
+			<opc openc="RR">VEX.L0.0F.W0 44 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise NOT of 16 bits mask k2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KNOTB</mnem>
+			<args>k1,k2</args>
+			<opc openc="RR">VEX.L0.66.0F.W0 44 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Bitwise NOT of 8 bits mask k2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KNOTQ</mnem>
+			<args>k1,k2</args>
+			<opc openc="RR">VEX.L0.0F.W1 44 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise NOT of 64 bits mask k2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KNOTD</mnem>
+			<args>k1,k2</args>
+			<opc openc="RR">VEX.L0.66.0F.W1 44 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise NOT of 32 bits mask k2.</dscrp>
+		</ins>
+		<oprndenc openc="RR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KORW/KORB/KORQ/KORD--Bitwise Logical OR Masks.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KORW</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.NDS.L1.0F.W0 45 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise OR 16 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KORB</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W0 45 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Bitwise OR 8 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KORQ</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.0F.W1 45 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise OR 64 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KORD</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W1 45 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise OR 32 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.1vvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KORTESTW/KORTESTB/KORTESTQ/KORTESTD--OR Masks And Set Flags.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KORTESTW</mnem>
+			<args>k1,k2</args>
+			<opc openc="RR">VEX.L0.0F.W0 98 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise OR 16 bits masks k1 and k2 and update ZF and CF accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KORTESTB</mnem>
+			<args>k1,k2</args>
+			<opc openc="RR">VEX.L0.66.0F.W0 98 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Bitwise OR 8 bits masks k1 and k2 and update ZF and CF accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KORTESTQ</mnem>
+			<args>k1,k2</args>
+			<opc openc="RR">VEX.L0.0F.W1 98 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise OR 64 bits masks k1 and k2 and update ZF and CF accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KORTESTD</mnem>
+			<args>k1,k2</args>
+			<opc openc="RR">VEX.L0.66.0F.W1 98 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise OR 32 bits masks k1 and k2 and update ZF and CF accordingly.</dscrp>
+		</ins>
+		<oprndenc openc="RR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KSHIFTLW/KSHIFTLB/KSHIFTLQ/KSHIFTLD--Shift Left Mask Registers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KSHIFTLW</mnem>
+			<args>k1,k2,imm8</args>
+			<opc openc="RRI">VEX.L0.66.0F3A.W1 32 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift left 16 bits in k2 by immediate and write result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KSHIFTLB</mnem>
+			<args>k1,k2,imm8</args>
+			<opc openc="RRI">VEX.L0.66.0F3A.W0 32 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Shift left 8 bits in k2 by immediate and write result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KSHIFTLQ</mnem>
+			<args>k1,k2,imm8</args>
+			<opc openc="RRI">VEX.L0.66.0F3A.W1 33 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift left 64 bits in k2 by immediate and write result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KSHIFTLD</mnem>
+			<args>k1,k2,imm8</args>
+			<opc openc="RRI">VEX.L0.66.0F3A.W0 33 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift left 32 bits in k2 by immediate and write result in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RRI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KSHIFTRW/KSHIFTRB/KSHIFTRQ/KSHIFTRD--Shift Right Mask Registers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KSHIFTRW</mnem>
+			<args>k1,k2,imm8</args>
+			<opc openc="RRI">VEX.L0.66.0F3A.W1 30 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift right 16 bits in k2 by immediate and write result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KSHIFTRB</mnem>
+			<args>k1,k2,imm8</args>
+			<opc openc="RRI">VEX.L0.66.0F3A.W0 30 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Shift right 8 bits in k2 by immediate and write result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KSHIFTRQ</mnem>
+			<args>k1,k2,imm8</args>
+			<opc openc="RRI">VEX.L0.66.0F3A.W1 31 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift right 64 bits in k2 by immediate and write result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KSHIFTRD</mnem>
+			<args>k1,k2,imm8</args>
+			<opc openc="RRI">VEX.L0.66.0F3A.W0 31 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift right 32 bits in k2 by immediate and write result in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RRI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KXNORW/KXNORB/KXNORQ/KXNORD--Bitwise Logical XNOR Masks.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KXNORW</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.NDS.L1.0F.W0 46 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise XNOR 16 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KXNORB</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W0 46 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Bitwise XNOR 8 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KXNORQ</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.0F.W1 46 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise XNOR 64 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KXNORD</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W1 46 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise XNOR 32 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.1vvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KTESTW/KTESTB/KTESTQ/KTESTD--Packed Bit Test Masks and Set Flags.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>RR</mnem>
+			<args>KTESTW k1,k2</args>
+			<opc openc="">VEX.L0.0F.W0 99 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Set ZF and CF depending on sign bit AND and ANDN of 16 bits mask register sources.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RR</mnem>
+			<args>KTESTB k1,k2</args>
+			<opc openc="">VEX.L0.66.0F.W0 99 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Set ZF and CF depending on sign bit AND and ANDN of 8 bits mask register sources.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RR</mnem>
+			<args>KTESTQ k1,k2</args>
+			<opc openc="">VEX.L0.0F.W1 99 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Set ZF and CF depending on sign bit AND and ANDN of 64 bits mask register sources.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RR</mnem>
+			<args>KTESTD k1,k2</args>
+			<opc openc="">VEX.L0.66.0F.W1 99 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Set ZF and CF depending on sign bit AND and ANDN of 32 bits mask register sources.</dscrp>
+		</ins>
+		<oprndenc openc="RR">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KXORW/KXORB/KXORQ/KXORD--Bitwise Logical XOR Masks.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KXORW</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.NDS.L1.0F.W0 47 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR 16 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KXORB</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W0 47 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR 8 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KXORQ</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.0F.W1 47 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR 64 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KXORD</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W1 47 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR 32 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.1vvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VEXP2PD--Approximation to the Exponential 2^x of Packed Double-Precision Floating-Point Values with Less Than 2^-23 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXP2PD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst {sae}</args>
+			<opc openc="FV">EVEX.512.66.0F38.W1 C8 /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes approximations to the exponential 2^x (with less than 2^-23 of maximum relative error) of the packed doubleprecision floating-point values from zmm2/m512/m64bcst and stores the floating-point result in zmm1with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VEXP2PS--Approximation to the Exponential 2^x of Packed Single-Precision Floating-Point Values with Less Than 2^-23 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXP2PS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst {sae}</args>
+			<opc openc="FV">EVEX.512.66.0F38.W0 C8 /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes approximations to the exponential 2^x (with less than 2^-23 of maximum relative error) of the packed singleprecision floating-point values from zmm2/m512/m32bcst and stores the floating-point result in zmm1with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRCP28PD--Approximation to the Reciprocal of Packed Double-Precision Floating-Point Values with Less Than 2^-28 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP28PD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst {sae}</args>
+			<opc openc="FV">EVEX.512.66.0F38.W1 CA /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals ( &lt; 2^-28 relative error) of the packed double-precision floating-point values in zmm2/m512/m64bcst and stores the results in zmm1. Under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRCP28SD--Approximation to the Reciprocal of Scalar Double-Precision Floating-Point Value with Less Than 2^-28 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP28SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64 {sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W1 CB /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal ( &lt; 2^-28 relative error) of the scalar double-precision floating-point value in xmm3/m64 and stores the results in xmm1. Under writemask. Also, upper double-precision floating-point value (bits[127:64]) from xmm2 is copied to xmm1[127:64].</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRCP28PS--Approximation to the Reciprocal of Packed Single-Precision Floating-Point Values with Less Than 2^-28 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP28PS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst {sae}</args>
+			<opc openc="FV">EVEX.512.66.0F38.W0 CA /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals ( &lt; 2^-28 relative error) of the packed single-precision floating-point values in zmm2/m512/m32bcst and stores the results in zmm1. Under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRCP28SS--Approximation to the Reciprocal of Scalar Single-Precision Floating-Point Value with Less Than 2^-28 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP28SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32 {sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W0 CB /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal ( &lt; 2^-28 relative error) of the scalar single-precision floating-point value in xmm3/m32 and stores the results in xmm1. Under writemask. Also, upper 3 single-precision floating-point values (bits[127:32]) from xmm2 is copied to xmm1[127:32].</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRSQRT28PD--Approximation to the Reciprocal Square Root of Packed Double-Precision Floating-Point Values with Less Than 2^-28 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT28PD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst {sae}</args>
+			<opc openc="FV">EVEX.512.66.0F38.W1 CC /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes approximations to the Reciprocal square root (&lt;2^28 relative error) of the packed double-precision floating-point values from zmm2/m512/m64bcst and stores result in zmm1with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRSQRT28SD--Approximation to the Reciprocal Square Root of Scalar Double-Precision Floating-Point Value with Less Than 2^-28 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT28SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64 {sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W1 CD /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes approximate reciprocal square root (&lt;2^-28 relative error) of the scalar double-precision floating-point value from xmm3/m64 and stores result in xmm1with writemask k1. Also, upper double-precision floating-point value (bits[127:64]) from xmm2 is copied to xmm1[127:64].</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRSQRT28PS--Approximation to the Reciprocal Square Root of Packed Single-Precision Floating-Point Values with Less Than 2^-28 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT28PS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst {sae}</args>
+			<opc openc="FV">EVEX.512.66.0F38.W0 CC /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes approximations to the Reciprocal square root (&lt;2^-28 relative error) of the packed single-precision floating-point values from zmm2/m512/m32bcst and stores result in zmm1with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRSQRT28SS--Approximation to the Reciprocal Square Root of Scalar Single-Precision Floating-Point Value with Less Than 2^-28 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT28SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32 {sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W0 CD /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes approximate reciprocal square root (&lt;2^-28 relative error) of the scalar single-precision floating-point value from xmm3/m32 and stores result in xmm1with writemask k1. Also, upper 3 single-precision floating-point value (bits[127:32]) from xmm2 is copied to xmm1[127:32].</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGATHERPF0DPS/VGATHERPF0QPS/VGATHERPF0DPD/VGATHERPF0QPD--Sparse Prefetch Packed SP/DP Data Values with Signed Dword, Signed Qword Indices Using T0 Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERPF0DPS</mnem>
+			<args>vm32z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 C6 /1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing single-precision data using opmask k1 and T0 hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERPF0QPS</mnem>
+			<args>vm64z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 C7 /1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing single-precision data using opmask k1 and T0 hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERPF0DPD</mnem>
+			<args>vm32y {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 C6 /1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing double-precision data using opmask k1 and T0 hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERPF0QPD</mnem>
+			<args>vm64z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 C7 /1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing double-precision data using opmask k1 and T0 hint.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGATHERPF1DPS/VGATHERPF1QPS/VGATHERPF1DPD/VGATHERPF1QPD--Sparse Prefetch Packed SP/DP Data Values with Signed Dword, Signed Qword Indices Using T1 Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERPF1DPS</mnem>
+			<args>vm32z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 C6 /2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing single-precision data using opmask k1 and T1 hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERPF1QPS</mnem>
+			<args>vm64z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 C7 /2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing single-precision data using opmask k1 and T1 hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERPF1DPD</mnem>
+			<args>vm32y {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 C6 /2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing double-precision data using opmask k1 and T1 hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERPF1QPD</mnem>
+			<args>vm64z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 C7 /2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing double-precision data using opmask k1 and T1 hint.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>BaseReg(R): VSIB:base,VectorReg(R):VSIB:index</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VSCATTERPF0DPS/VSCATTERPF0QPS/VSCATTERPF0DPD/VSCATTERPF0QPD--Sparse Prefetch Packed SP/DP Data Values with Signed Dword, Signed Qword Indices Using T0 Hint with Intent to Write.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERPF0DPS</mnem>
+			<args>vm32z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 C6 /5 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing single-precision data using writemask k1 and T0 hint with intent to write.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERPF0QPS</mnem>
+			<args>vm64z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 C7 /5 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing single-precision data using writemask k1 and T0 hint with intent to write.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERPF0DPD</mnem>
+			<args>vm32y {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 C6 /5 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing double-precision data using writemask k1 and T0 hint with intent to write.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERPF0QPD</mnem>
+			<args>vm64z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 C7 /5 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing double-precision data using writemask k1 and T0 hint with intent to write.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VSCATTERPF1DPS/VSCATTERPF1QPS/VSCATTERPF1DPD/VSCATTERPF1QPD--Sparse Prefetch Packed SP/DP Data Values with Signed Dword, Signed Qword Indices Using T1 Hint with Intent to Write.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERPF1DPS</mnem>
+			<args>vm32z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 C6 /6 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing single-precision data using writemask k1 and T1 hint with intent to write.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERPF1QPS</mnem>
+			<args>vm64z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 C7 /6 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing single-precision data using writemask k1 and T1 hint with intent to write.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERPF1DPD</mnem>
+			<args>vm32y {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 C6 /6 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing double-precision data using writemask k1 and T1 hint with intent to write.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERPF1QPD</mnem>
+			<args>vm64z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 C7 /6 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing double-precision data using writemask k1 and T1 hint with intent to write.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHA1RNDS4--Perform Four Rounds of SHA1 Operation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHA1RNDS4</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">0F 3A CC /r ib</opc>
+			<cpuid>
+				<flag>SHA</flag>
+			</cpuid>
+			<dscrp>Performs four rounds of SHA1 operation operating on SHA1 state (A,B,C,D) from xmm1, with a pre-computed sum of the next 4 round message dwords and state variable E from xmm2/m128. The immediate byte controls logic functions and round constants.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHA1NEXTE--Calculate SHA1 State Variable E after Four Rounds.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHA1NEXTE</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 38 C8 /r</opc>
+			<cpuid>
+				<flag>SHA</flag>
+			</cpuid>
+			<dscrp>Calculates SHA1 state variable E after four rounds of operation from the current SHA1 state variable A in xmm1. The calculated value of the SHA1 state variable E is added to the scheduled dwords in xmm2/m128, and stored with some of the scheduled dwords in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHA1MSG1--Perform an Intermediate Calculation for the Next Four SHA1 Message Dwords.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHA1MSG1</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 38 C9 /r</opc>
+			<cpuid>
+				<flag>SHA</flag>
+			</cpuid>
+			<dscrp>Performs an intermediate calculation for the next four SHA1 message dwords using previous message dwords from xmm1 and xmm2/m128, storing the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHA1MSG2--Perform a Final Calculation for the Next Four SHA1 Message Dwords.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHA1MSG2</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 38 CA /r</opc>
+			<cpuid>
+				<flag>SHA</flag>
+			</cpuid>
+			<dscrp>Performs the final calculation for the next four SHA1 message dwords using intermediate results from xmm1 and the previous message dwords from xmm2/m128, storing the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHA256RNDS2--Perform Two Rounds of SHA256 Operation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHA256RNDS2</mnem>
+			<args>xmm1,xmm2/m128,&lt;XMM0&gt;</args>
+			<opc openc="RM0">0F 38 CB /r</opc>
+			<cpuid>
+				<flag>SHA</flag>
+			</cpuid>
+			<dscrp>Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H) from xmm1, an initial SHA256 state (A,B,E,F) from xmm2/m128, and a pre-computed sum of the next 2 round message dwords and the corresponding round constants from the implicit operand XMM0, storing the updated SHA256 state (A,B,E,F) result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Implicit XMM0(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHA256MSG1--Perform an Intermediate Calculation for the Next Four SHA256 Message Dwords.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHA256MSG1</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 38 CC /r</opc>
+			<cpuid>
+				<flag>SHA</flag>
+			</cpuid>
+			<dscrp>Performs an intermediate calculation for the next four SHA256 message dwords using previous message dwords from xmm1 and xmm2/m128, storing the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHA256MSG2--Perform a Final Calculation for the Next Four SHA256 Message Dwords.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHA256MSG2</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 38 CD /r</opc>
+			<cpuid>
+				<flag>SHA</flag>
+			</cpuid>
+			<dscrp>Performs the final calculation for the next four SHA256 message dwords using previous message dwords from xmm1 and xmm2/m128, storing the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BNDMK--Make Bounds.</brief>
+		<ins x32m="V" x64m="NE">
+			<mnem>BNDMK</mnem>
+			<args>bnd,m32</args>
+			<opc openc="RM">F3 0F 1B /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Make lower and upper bounds from m32 and store them in bnd.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BNDMK</mnem>
+			<args>bnd,m64</args>
+			<opc openc="RM">F3 0F 1B /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Make lower and upper bounds from m64 and store them in bnd.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BNDCL--Check Lower Bound.</brief>
+		<ins x32m="V" x64m="NE">
+			<mnem>BNDCL</mnem>
+			<args>bnd,r/m32</args>
+			<opc openc="RM">F3 0F 1A /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Generate a #BR if the address in r/m32 is lower than the lower bound in bnd.LB.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BNDCL</mnem>
+			<args>bnd,r/m64</args>
+			<opc openc="RM">F3 0F 1A /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Generate a #BR if the address in r/m64 is lower than the lower bound in bnd.LB.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BNDCU/BNDCN--Check Upper Bound.</brief>
+		<ins x32m="V" x64m="NE">
+			<mnem>BNDCU</mnem>
+			<args>bnd,r/m32</args>
+			<opc openc="RM">F2 0F 1A /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Generate a #BR if the address in r/m32 is higher than the upper bound in bnd.UB (bnb.UB in 1's complement form).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BNDCU</mnem>
+			<args>bnd,r/m64</args>
+			<opc openc="RM">F2 0F 1A /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Generate a #BR if the address in r/m64 is higher than the upper bound in bnd.UB (bnb.UB in 1's complement form).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>BNDCN</mnem>
+			<args>bnd,r/m32</args>
+			<opc openc="RM">F2 0F 1B /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Generate a #BR if the address in r/m32 is higher than the upper bound in bnd.UB (bnb.UB not in 1's complement form).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BNDCN</mnem>
+			<args>bnd,r/m64</args>
+			<opc openc="RM">F2 0F 1B /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Generate a #BR if the address in r/m64 is higher than the upper bound in bnd.UB (bnb.UB not in 1's complement form).</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BNDMOV--Move Bounds.</brief>
+		<ins x32m="V" x64m="NE">
+			<mnem>BNDMOV</mnem>
+			<args>bnd1,bnd2/m64</args>
+			<opc openc="RM">66 0F 1A /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Move lower and upper bound from bnd2/m64 to bound register bnd1.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BNDMOV</mnem>
+			<args>bnd1,bnd2/m128</args>
+			<opc openc="RM">66 0F 1A /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Move lower and upper bound from bnd2/m128 to bound register bnd1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>BNDMOV</mnem>
+			<args>bnd1/m64,bnd2</args>
+			<opc openc="MR">66 0F 1B /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Move lower and upper bound from bnd2 to bnd1/m64.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BNDMOV</mnem>
+			<args>bnd1/m128,bnd2</args>
+			<opc openc="MR">66 0F 1B /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Move lower and upper bound from bnd2 to bound register bnd1/m128.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BNDLDX--Load Extended Bounds Using Address Translation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BNDLDX</mnem>
+			<args>bnd,mib</args>
+			<opc openc="RM">0F 1A /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Load the bounds stored in a bound table entry (BTE) into bnd with address translation using the base of mib and conditional on the index of mib matching the pointer value in the BTE.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>SIB.base(r): Address of pointer,SIB.index(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BNDSTX--Store Extended Bounds Using Address Translation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BNDSTX</mnem>
+			<args>mib,bnd</args>
+			<opc openc="MR">0F 1B /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Store the bounds in bnd and the pointer value in the index register of mib to a bound table entry (BTE) with address translation using the base of mib.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>SIB.base(r): Address of pointer,SIB.index(r)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CLFLUSHOPT--Flush a Cache Line Optimized.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CLFLUSHOPT</mnem>
+			<args>m8</args>
+			<opc openc="M">66 0F AE /7</opc>
+			<cpuid>
+				<flag>CLFLUSHOPT</flag>
+			</cpuid>
+			<dscrp>Flushes cache line containing m8.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CLWB--Cache Line Write Back.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CLWB</mnem>
+			<args>m8</args>
+			<opc openc="M">66 0F AE /6</opc>
+			<cpuid>
+				<flag>CLWB</flag>
+			</cpuid>
+			<dscrp>Writes back modified cache line containing m8, and may retain the line in cache hierarchy in non-modified state.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PCOMMIT--Persistent Commit.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PCOMMIT</mnem>
+			<args>void</args>
+			<opc openc="NP">66 0F AE F8</opc>
+			<cpuid>
+				<flag>PCOMMIT</flag>
+			</cpuid>
+			<dscrp>Commits stores to persistent memory.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+</instrs>
\ No newline at end of file
diff --git a/xml/raw/x86/Intel/AVX512_r24.xml b/xml/raw/x86/Intel/AVX512_r24.xml
new file mode 100644
index 0000000..5ac8038
--- /dev/null
+++ b/xml/raw/x86/Intel/AVX512_r24.xml
@@ -0,0 +1,25739 @@
+<?xml version="1.0" encoding="ASCII"?>
+<!DOCTYPE instrs SYSTEM "AVX512_Rules.dtd">
+<!-- Copyright (c) 2016 Mahdi Safsafi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+ -->
+<!--   https://github.com/MahdiSafsafi/Parsable-Instructions   -->
+<!-- 
+  This XML file includes all instructions found in :
+  Intel Architecture Instruction Set Extensions Programming Reference 319433-024 document.
+ -->
+<!-- 
+****KEY TO ABBREVIATIONS****
+  x32m = 32-bit mode support.
+  x64m = 64-bit mode support.
+  mnem = Instruction Mnemonic.
+  args = Instruction Arguments.
+  opc  = Opcodes.
+  openc = Operand Encoding.
+  dscrp = Description.
+  oprndenc = Instruction Operand Encoding.
+  oprnd1 = Operand 1.
+  oprnd2 = Operand 2.
+  oprnd3 = Operand 3.
+  oprnd4 = Operand 4.
+
+****FOR THE REST OF KEYS YOU SHOULD REFER TO INTEL DOCUMENTATIONS!****
+ -->
+<instrs version="1.00">
+	<common>
+		<brief>ADDPD--Add Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ADDPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 58 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed double-precision floating-point values from xmm2/mem to xmm1 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 58 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed double-precision floating-point values from xmm3/mem to xmm2 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 58 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed double-precision floating-point values from ymm3/mem to ymm2 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 58 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed double-precision floating-point values from xmm3/m128/m64bcst to xmm2 and store result in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 58 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed double-precision floating-point values from ymm3/m256/m64bcst to ymm2 and store result in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 58 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed double-precision floating-point values from zmm3/m512/m64bcst to zmm2 and store result in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ADDPS--Add Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ADDPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 58 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Add packed single-precision floating-point values from xmm2/m128 to xmm1 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 58 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed single-precision floating-point values from xmm3/m128 to xmm2 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 58 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed single-precision floating-point values from ymm3/m256 to ymm2 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed single-precision floating-point values from xmm3/m128/m32bcst to xmm2 and store result in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed single-precision floating-point values from ymm3/m256/m32bcst to ymm2 and store result in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst {er}</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed single-precision floating-point values from zmm3/m512/m32bcst to zmm2 and store result in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ADDSD--Add Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ADDSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 58 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add the low double-precision floating-point value from xmm2/mem to xmm1 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 58 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add the low double-precision floating-point value from xmm3/mem to xmm2 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 58 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add the low double-precision floating-point value from xmm3/m64 to xmm2 and store the result in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ADDSS--Add Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ADDSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 58 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Add the low single-precision floating-point value from xmm2/mem to xmm1 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 58 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add the low single-precision floating-point value from xmm3/mem to xmm2 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDSS</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add the low single-precision floating-point value from xmm3/m32 to xmm2 and store the result in xmm1with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VALIGND/VALIGNQ--Align Doubleword/Quadword Vectors.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VALIGND</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W0 03 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift right and merge vectors xmm2 and xmm3/m128/m32bcst with double-word granularity using imm8 as number of elements to shift, and store the final result in xmm1, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VALIGNQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W1 03 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift right and merge vectors xmm2 and xmm3/m128/m64bcst with quad-word granularity using imm8 as number of elements to shift, and store the final result in xmm1, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VALIGND</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 03 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift right and merge vectors ymm2 and ymm3/m256/m32bcst with double-word granularity using imm8 as number of elements to shift, and store the final result in ymm1, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VALIGNQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 03 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift right and merge vectors ymm2 and ymm3/m256/m64bcst with quad-word granularity using imm8 as number of elements to shift, and store the final result in ymm1, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VALIGND</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 03 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift right and merge vectors zmm2 and zmm3/m512/m32bcst with double-word granularity using imm8 as number of elements to shift, and store the final result in zmm1, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VALIGNQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 03 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift right and merge vectors zmm2 and zmm3/m512/m64bcst with quad-word granularity using imm8 as number of elements to shift, and store the final result in zmm1, under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VBLENDMPD/VBLENDMPS--Blend Float64/Float32 Vectors Using an OpMask Control.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDMPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 65 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend double-precision vector xmm2 and double-precision vector xmm3/m128/m64bcst and store the result in xmm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDMPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 65 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend double-precision vector ymm2 and double-precision vector ymm3/m256/m64bcst and store the result in ymm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDMPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 65 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend double-precision vector zmm2 and double-precision vector zmm3/m512/m64bcst and store the result in zmm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDMPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 65 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend single-precision vector xmm2 and single-precision vector xmm3/m128/m32bcst and store the result in xmm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDMPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 65 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend single-precision vector ymm2 and single-precision vector ymm3/m256/m32bcst and store the result in ymm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDMPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 65 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend single-precision vector zmm2 and single-precision vector zmm3/m512/m32bcst using k1 as select control and store the result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPBLENDMB/VPBLENDMW--Blend Byte/Word Vectors Using an Opmask Control.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.W0 66 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Blend byte integer vector xmm2 and byte vector xmm3/m128 and store the result in xmm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W0 66 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Blend byte integer vector ymm2 and byte vector ymm3/m256 and store the result in ymm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W0 66 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Blend byte integer vector zmm2 and byte vector zmm3/m512 and store the result in zmm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.W1 66 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Blend word integer vector xmm2 and word vector xmm3/m128 and store the result in xmm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W1 66 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Blend word integer vector ymm2 and word vector ymm3/m256 and store the result in ymm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W1 66 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Blend word integer vector zmm2 and word vector zmm3/m512 and store the result in zmm1, under control mask.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPBLENDMD/VPBLENDMQ--Blend Int32/Int64 Vectors Using an OpMask Control.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 64 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend doubleword integer vector xmm2 and doubleword vector xmm3/m128/m32bcst and store the result in xmm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 64 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend doubleword integer vector ymm2 and doubleword vector ymm3/m256/m32bcst and store the result in ymm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 64 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend doubleword integer vector zmm2 and doubleword vector zmm3/m512/m32bcst and store the result in zmm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 64 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend quadword integer vector xmm2 and quadword vector xmm3/m128/m64bcst and store the result in xmm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 64 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend quadword integer vector ymm2 and quadword vector ymm3/m256/m64bcst and store the result in ymm1, under control mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDMQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 64 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Blend quadword integer vector zmm2 and quadword vector zmm3/m512/m64bcst and store the result in zmm1, under control mask.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ANDPD--Bitwise Logical AND of Packed Double Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ANDPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 54 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F 54 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F 54 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 54 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in xmm2 and xmm3/m128/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 54 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in ymm2 and ymm3/m256/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 54 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in zmm2 and zmm3/m512/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ANDPS--Bitwise Logical AND of Packed Single Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ANDPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 54 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F 54 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F 54 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 54 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in xmm2 and xmm3/m128/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 54 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in ymm2 and ymm3/m256/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 54 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in zmm2 and zmm3/m512/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ANDNPD--Bitwise Logical AND NOT of Packed Double Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ANDNPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 55 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed doubleprecision floating-point values in xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F 55 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed doubleprecision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F 55/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed doubleprecision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 55 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed doubleprecision floating-point values in xmm2 and xmm3/m128/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 55 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed doubleprecision floating-point values in ymm2 and ymm3/m256/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 55 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed doubleprecision floating-point values in zmm2 and zmm3/m512/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ANDNPS--Bitwise Logical AND NOT of Packed Single Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ANDNPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 55 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed single-precision floating-point values in xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F 55 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed single-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F 55 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed single-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 55 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in xmm2 and xmm3/m128/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 55 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in ymm2 and ymm3/m256/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 55 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in zmm2 and zmm3/m512/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VBROADCAST--Load with Broadcast Floating-Point Data.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSS</mnem>
+			<args>xmm1,m32</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 18 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Broadcast single-precision floating-point element in mem to four locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSS</mnem>
+			<args>ymm1,m32</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 18 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Broadcast single-precision floating-point element in mem to eight locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSD</mnem>
+			<args>ymm1,m64</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 19 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Broadcast double-precision floating-point element in mem to four locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF128</mnem>
+			<args>ymm1,m128</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 1A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of floating-point data in mem to low and high 128-bits in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSD</mnem>
+			<args>ymm1 {k1}{z},xmm2/m64</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 19 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast low double-precision floating-point element in xmm2/m64 to four locations in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSD</mnem>
+			<args>zmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 19 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast low double-precision floating-point element in xmm2/m64 to eight locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF32X2</mnem>
+			<args>ymm1 {k1}{z},xmm2/m64</args>
+			<opc openc="T2">EVEX.256.66.0F38.W0 19 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast two single-precision floating-point elements in xmm2/m64 to locations in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF32X2</mnem>
+			<args>zmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="T2">EVEX.512.66.0F38.W0 19 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast two single-precision floating-point elements in xmm2/m64 to locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m32</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 18 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast low single-precision floating-point element in xmm2/m32 to all locations in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSS</mnem>
+			<args>ymm1 {k1}{z},xmm2/m32</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 18 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast low single-precision floating-point element in xmm2/m32 to all locations in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSS</mnem>
+			<args>zmm1 {k1}{z},xmm2/m32</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 18 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast low single-precision floating-point element in xmm2/m32 to all locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF32X4</mnem>
+			<args>ymm1 {k1}{z},m128</args>
+			<opc openc="T4">EVEX.256.66.0F38.W0 1A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of 4 single-precision floating-point data in mem to locations in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF32X4</mnem>
+			<args>zmm1 {k1}{z},m128</args>
+			<opc openc="T4">EVEX.512.66.0F38.W0 1A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of 4 single-precision floating-point data in mem to locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF64X2</mnem>
+			<args>ymm1 {k1}{z},m128</args>
+			<opc openc="T2">EVEX.256.66.0F38.W1 1A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of 2 double-precision floating-point data in mem to locations in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF64X2</mnem>
+			<args>zmm1 {k1}{z},m128</args>
+			<opc openc="T2">EVEX.512.66.0F38.W1 1A /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of 2 double-precision floating-point data in mem to locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF32X8</mnem>
+			<args>zmm1 {k1}{z},m256</args>
+			<opc openc="T8">EVEX.512.66.0F38.W0 1B /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast 256 bits of 8 single-precision floating-point data in mem to locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF64X4</mnem>
+			<args>zmm1 {k1}{z},m256</args>
+			<opc openc="T4">EVEX.512.66.0F38.W1 1B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast 256 bits of 4 double-precision floating-point data in mem to locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2">
+			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T4">
+			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T8ModRM">
+			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPBROADCASTB/W/D/Q--Load with Broadcast Integer Data from General Purpose Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTB</mnem>
+			<args>xmm1 {k1}{z},reg</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast an 8-bit value from a GPR to all bytes in the 128-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTB</mnem>
+			<args>ymm1 {k1}{z},reg</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast an 8-bit value from a GPR to all bytes in the 256-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTB</mnem>
+			<args>zmm1 {k1}{z},reg</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast an 8-bit value from a GPR to all bytes in the 512-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTW</mnem>
+			<args>xmm1 {k1}{z},reg</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 7B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast a 16-bit value from a GPR to all words in the 128-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTW</mnem>
+			<args>ymm1 {k1}{z},reg</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 7B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast a 16-bit value from a GPR to all words in the 256-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTW</mnem>
+			<args>zmm1 {k1}{z},reg</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 7B /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast a 16-bit value from a GPR to all words in the 512-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTD</mnem>
+			<args>xmm1 {k1}{z},r32</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 7C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a 32-bit value from a GPR to all double-words in the 128-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTD</mnem>
+			<args>ymm1 {k1}{z},r32</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 7C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a 32-bit value from a GPR to all double-words in the 256-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTD</mnem>
+			<args>zmm1 {k1}{z},r32</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 7C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a 32-bit value from a GPR to all double-words in the 512-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VPBROADCASTQ</mnem>
+			<args>xmm1 {k1}{z},r64</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 7C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a 64-bit value from a GPR to all quad-words in the 128-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VPBROADCASTQ</mnem>
+			<args>ymm1 {k1}{z},r64</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 7C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a 64-bit value from a GPR to all quad-words in the 256-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VPBROADCASTQ</mnem>
+			<args>zmm1 {k1}{z},r64</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 7C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a 64-bit value from a GPR to all quad-words in the 512-bit destination subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPBROADCAST--Load Integer and Broadcast.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTB</mnem>
+			<args>xmm1,xmm2/m8</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a byte integer in the source operand to sixteen locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTB</mnem>
+			<args>ymm1,xmm2/m8</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a byte integer in the source operand to thirty-two locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTB</mnem>
+			<args>xmm1{k1}{z},xmm2/m8</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast a byte integer in the source operand to locations in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTB</mnem>
+			<args>ymm1{k1}{z},xmm2/m8</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast a byte integer in the source operand to locations in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTB</mnem>
+			<args>zmm1{k1}{z},xmm2/m8</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast a byte integer in the source operand to 64 locations in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTW</mnem>
+			<args>xmm1,xmm2/m16</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a word integer in the source operand to eight locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTW</mnem>
+			<args>ymm1,xmm2/m16</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a word integer in the source operand to sixteen locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTW</mnem>
+			<args>xmm1{k1}{z},xmm2/m16</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast a word integer in the source operand to locations in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTW</mnem>
+			<args>ymm1{k1}{z},xmm2/m16</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast a word integer in the source operand to locations in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTW</mnem>
+			<args>zmm1{k1}{z},xmm2/m16</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Broadcast a word integer in the source operand to 32 locations in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTD</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a dword integer in the source operand to four locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTD</mnem>
+			<args>ymm1,xmm2/m32</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a dword integer in the source operand to eight locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m32</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a dword integer in the source operand to locations in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTD</mnem>
+			<args>ymm1 {k1}{z},xmm2/m32</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a dword integer in the source operand to locations in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTD</mnem>
+			<args>zmm1 {k1}{z},xmm2/m32</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a dword integer in the source operand to locations in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a qword element in source operand to two locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTQ</mnem>
+			<args>ymm1,xmm2/m64</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a qword element in source operand to four locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 59 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a qword element in source operand to locations in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m64</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 59 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a qword element in source operand to locations in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTQ</mnem>
+			<args>zmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 59 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast a qword element in source operand to locations in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI32x2</mnem>
+			<args>xmm1 {k 1}{z},xmm2/m64</args>
+			<opc openc="T2">EVEX.128.66.0F38.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast two dword elements in source operand to locations in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI32x2</mnem>
+			<args>ymm1 {k 1}{z},xmm2/m64</args>
+			<opc openc="T2">EVEX.256.66.0F38.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast two dword elements in source operand to locations in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI32x2</mnem>
+			<args>zmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="T2">EVEX.512.66.0F38.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast two dword elements in source operand to locations in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI128</mnem>
+			<args>ymm1,m128</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 5A /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of integer data in mem to low and high 128-bits in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI32X4</mnem>
+			<args>ymm1 {k1}{z},m128</args>
+			<opc openc="T4">EVEX.256.66.0F38.W0 5A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of 4 doubleword integer data in mem to locations in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI32X4</mnem>
+			<args>zmm1 {k1}{z},m128</args>
+			<opc openc="T4">EVEX.512.66.0F38.W0 5A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of 4 doubleword integer data in mem to locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI64X2</mnem>
+			<args>ymm1 {k1}{z},m128</args>
+			<opc openc="T2">EVEX.256.66.0F38.W1 5A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of 2 quadword integer data in mem to locations in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI64X2</mnem>
+			<args>zmm1 {k1}{z},m128</args>
+			<opc openc="T2">EVEX.512.66.0F38.W1 5A /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of 2 quadword integer data in mem to locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI32X8</mnem>
+			<args>zmm1 {k1}{z},m256</args>
+			<opc openc="T8">EVEX.512.66.0F38.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Broadcast 256 bits of 8 doubleword integer data in mem to locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI64X4</mnem>
+			<args>zmm1 {k1}{z},m256</args>
+			<opc openc="T4">EVEX.512.66.0F38.W1 5B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Broadcast 256 bits of 4 quadword integer data in mem to locations in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2">
+			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T4">
+			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T8ModRM">
+			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CMPPD--Compare Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CMPPD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F C2 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed double-precision floating-point values in xmm2/m128 and xmm1 using bits 2:0 of imm8 as a comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F.WIG C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed double-precision floating-point values in xmm3/m128 and xmm2 using bits 4:0 of imm8 as a comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F.WIG C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed double-precision floating-point values in ymm3/m256 and ymm2 using bits 4:0 of imm8 as a comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPD</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed double-precision floating-point values in xmm3/m128/m64bcst and xmm2 using bits 4:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPD</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed double-precision floating-point values in ymm3/m256/m64bcst and ymm2 using bits 4:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPD</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m64bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed double-precision floating-point values in zmm3/m512/m64bcst and zmm2 using bits 4:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CMPPS--Compare Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CMPPS</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">0F C2 /r ib</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Compare packed single-precision floating-point values in xmm2/m128 and xmm1 using bits 2:0 of imm8 as a comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.0F.WIG C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed single-precision floating-point values in xmm3/m128 and xmm2 using bits 4:0 of imm8 as a comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.0F.WIG C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed single-precision floating-point values in ymm3/m256 and ymm2 using bits 4:0 of imm8 as a comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPS</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed single-precision floating-point values in xmm3/m128/m32bcst and xmm2 using bits 4:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPS</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed single-precision floating-point values in ymm3/m256/m32bcst and ymm2 using bits 4:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPS</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m32bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed single-precision floating-point values in zmm3/m512/m32bcst and zmm2 using bits 4:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CMPSD--Compare Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CMPSD</mnem>
+			<args>xmm1,xmm2/m64,imm8</args>
+			<opc openc="RMI">F2 0F C2 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point value in xmm2/m64 and xmm1 using bits 2:0 of imm8 as comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.F2.0F.WIG C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point value in xmm3/m64 and xmm2 using bits 4:0 of imm8 as comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPSD</mnem>
+			<args>k1 {k2},xmm2,xmm3/m64{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point value in xmm3/m64 and xmm2 using bits 4:0 of imm8 as comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CMPSS--Compare Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CMPSS</mnem>
+			<args>xmm1,xmm2/m32,imm8</args>
+			<opc openc="RMI">F3 0F C2 /r ib</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of imm8 as comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.F3.0F.WIG C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point value in xmm3/m32 and xmm2 using bits 4:0 of imm8 as comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPSS</mnem>
+			<args>k1 {k2},xmm2,xmm3/m32{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point value in xmm3/m32 and xmm2 using bits 4:0 of imm8 as comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>COMISD--Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>COMISD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0F 2F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/mem64 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMISD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 2F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/mem64 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMISD</mnem>
+			<args>xmm1,xmm2/m64{sae}</args>
+			<opc openc="T1S">EVEX.LIG.66.0F.W1 2F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/mem64 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>COMISS--Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>COMISS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">0F 2F /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMISS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.0F.WIG 2F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMISS</mnem>
+			<args>xmm1,xmm2/m32{sae}</args>
+			<opc openc="T1S">EVEX.LIG.0F.W0 2F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>DIVPD--Divide Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>DIVPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 5E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Divide packed double-precision floating-point values in xmm1 by packed double-precision floating-point values in xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 5E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Divide packed double-precision floating-point values in xmm2 by packed double-precision floating-point values in xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 5E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Divide packed double-precision floating-point values in ymm2 by packed double-precision floating-point values in ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 5E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Divide packed double-precision floating-point values in xmm2 by packed double-precision floating-point values in xmm3/m128/m64bcst and write results to xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 5E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Divide packed double-precision floating-point values in ymm2 by packed double-precision floating-point values in ymm3/m256/m64bcst and write results to ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 5E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Divide packed double-precision floating-point values in zmm2 by packed double-precision FP values in zmm3/m512/m64bcst and write results to zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>DIVPS--Divide Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>DIVPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 5E /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Divide packed single-precision floating-point values in xmm1 by packed single-precision floating-point values in xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 5E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Divide packed single-precision floating-point values in xmm2 by packed single-precision floating-point values in xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 5E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Divide packed single-precision floating-point values in ymm2 by packed single-precision floating-point values in ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 5E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Divide packed single-precision floating-point values in xmm2 by packed single-precision floating-point values in xmm3/m128/m32bcst and write results to xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 5E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Divide packed single-precision floating-point values in ymm2 by packed single-precision floating-point values in ymm3/m256/m32bcst and write results to ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 5E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Divide packed single-precision floating-point values in zmm2 by packed single-precision floating-point values in zmm3/m512/m32bcst and write results to zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>DIVSD--Divide Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>DIVSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 5E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Divide low double-precision floating-point value in xmm1 by low double-precision floating-point value in xmm2/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 5E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Divide low double-precision floating-point value in xmm2 by low double-precision floating-point value in xmm3/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 5E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Divide low double-precision floating-point value in xmm2 by low double-precision floating-point value in xmm3/m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>DIVSS--Divide Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>DIVSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 5E /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Divide low single-precision floating-point value in xmm1 by low single-precision floating-point value in xmm2/m32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 5E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Divide low single-precision floating-point value in xmm2 by low single-precision floating-point value in xmm3/m32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 5E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Divide low single-precision floating-point value in xmm2 by low single-precision floating-point value in xmm3/m32.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCOMPRESSPD--Store Sparse Packed Double-Precision Floating-Point Values into Dense Memory.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMPRESSPD</mnem>
+			<args>xmm1/m128 {k1}{z},xmm2</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 8A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed double-precision floating-point values from xmm2 to xmm1/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMPRESSPD</mnem>
+			<args>ymm1/m256 {k1}{z},ymm2</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 8A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed double-precision floating-point values from ymm2 to ymm1/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMPRESSPD</mnem>
+			<args>zmm1/m512 {k1}{z},zmm2</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 8A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed double-precision floating-point values from zmm2 using control mask k1 to zmm1/m512.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCOMPRESSPS--Store Sparse Packed Single-Precision Floating-Point Values into Dense Memory.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMPRESSPS</mnem>
+			<args>xmm1/m128 {k1}{z},xmm2</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 8A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed single-precision floating-point values from xmm2 to xmm1/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMPRESSPS</mnem>
+			<args>ymm1/m256 {k1}{z},ymm2</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 8A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed single-precision floating-point values from ymm2 to ymm1/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMPRESSPS</mnem>
+			<args>zmm1/m512 {k1}{z},zmm2</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 8A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed single-precision floating-point values from zmm2 using control mask k1 to zmm1/m512.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTDQ2PD--Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTDQ2PD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F3 0F E6 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert two packed signed doubleword integers from xmm2/mem to two packed double-precision floatingpoint values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG E6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert two packed signed doubleword integers from xmm2/mem to two packed double-precision floatingpoint values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PD</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.F3.0F.WIG E6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed signed doubleword integers from xmm2/mem to four packed double-precision floatingpoint values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="HV">EVEX.128.F3.0F.W0 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert 2 packed signed doubleword integers from xmm2/m128/m32bcst to eight packed double-precision floating-point values in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PD</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="HV">EVEX.256.F3.0F.W0 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert 4 packed signed doubleword integers from xmm2/m128/m32bcst to 4 packed double-precision floating-point values in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PD</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="HV">EVEX.512.F3.0F.W0 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed signed doubleword integers from ymm2/m256/m32bcst to eight packed double-precision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="HV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTDQ2PS--Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTDQ2PS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 5B /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert four packed signed doubleword integers from xmm2/mem to four packed single-precision floatingpoint values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.0F.WIG 5B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed signed doubleword integers from xmm2/mem to four packed single-precision floatingpoint values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PS</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.0F.WIG 5B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert eight packed signed doubleword integers from ymm2/mem to eight packed single-precision floatingpoint values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.0F.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed signed doubleword integers from xmm2/m128/m32bcst to four packed single-precision floating-point values in xmm1with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.0F.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed signed doubleword integers from ymm2/m256/m32bcst to eight packed single-precision floating-point values in ymm1with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.512.0F.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert sixteen packed signed doubleword integers from zmm2/m512/m32bcst to sixteen packed singleprecision floating-point values in zmm1with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTPD2DQ--Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTPD2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">F2 0F E6 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/mem to two signed doubleword integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.F2.0F.WIG E6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/mem to two signed doubleword integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2DQ</mnem>
+			<args>xmm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.F2.0F.WIG E6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values in ymm2/mem to four signed doubleword integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2DQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.F2.0F.W1 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/m128/m64bcst to two signed doubleword integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2DQ</mnem>
+			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.F2.0F.W1 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values in ymm2/m256/m64bcst to four signed doubleword integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2DQ</mnem>
+			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.F2.0F.W1 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed double-precision floating-point values in zmm2/m512/m64bcst to eight signed doubleword integers in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTPD2PS--Convert Packed Double-Precision Floating-Point Values to Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTPD2PS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 5A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/mem to two single-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2PS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 5A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/mem to two single-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2PS</mnem>
+			<args>xmm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 5A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values in ymm2/mem to four single-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2PS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F.W1 5A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/m128/m64bcst to two singleprecision floating-point values in xmm1with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2PS</mnem>
+			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F.W1 5A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values in ymm2/m256/m64bcst to four singleprecision floating-point values in xmm1with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2PS</mnem>
+			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.66.0F.W1 5A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed double-precision floating-point values in zmm2/m512/m64bcst to eight singleprecision floating-point values in ymm1with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTPD2QQ--Convert Packed Double-Precision Floating-Point Values to Packed Quadword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2QQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F.W1 7B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values from xmm2/m128/m64bcst to two packed quadword integers in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2QQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F.W1 7B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values from ymm2/m256/m64bcst to four packed quadword integers in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2QQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.66.0F.W1 7B /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed double-precision floating-point values from zmm2/m512/m64bcst to eight packed quadword integers in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTPD2UDQ--Convert Packed Double-Precision Floating-Point Values to Packed Unsigned Doubleword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2UDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.0F.W1 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/m128/m64bcst to two unsigned doubleword integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2UDQ</mnem>
+			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.0F.W1 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values in ymm2/m256/m64bcst to four unsigned doubleword integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2UDQ</mnem>
+			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.0F.W1 79 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed double-precision floating-point values in zmm2/m512/m64bcst to eight unsigned doubleword integers in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTPD2UQQ--Convert Packed Double-Precision Floating-Point Values to Packed Unsigned Quadword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2UQQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F.W1 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values from xmm2/mem to two packed unsigned quadword integers in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2UQQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F.W1 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert fourth packed double-precision floating-point values from ymm2/mem to four packed unsigned quadword integers in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2UQQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.66.0F.W1 79 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed double-precision floating-point values from zmm2/mem to eight packed unsigned quadword integers in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTPH2PS--Convert 16-bit FP values to Single-Precision FP values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPH2PS</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 1313 /r</opc>
+			<cpuid>
+				<flag>F16C</flag>
+			</cpuid>
+			<dscrp>Convert four packed half precision (16-bit) floatingpoint values in xmm2/m64 to packed single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPH2PS</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 1313 /r</opc>
+			<cpuid>
+				<flag>F16C</flag>
+			</cpuid>
+			<dscrp>Convert eight packed half precision (16-bit) floatingpoint values in xmm2/m128 to packed singleprecision floating-point value in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPH2PS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="HVM">EVEX.128.66.0F38.W0 1313 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed half precision (16-bit) floatingpoint values in xmm2/m64 to packed single-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPH2PS</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128</args>
+			<opc openc="HVM">EVEX.256.66.0F38.W0 1313 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed half precision (16-bit) floatingpoint values in xmm2/m128 to packed singleprecision floating-point values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPH2PS</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256 {sae}</args>
+			<opc openc="HVM">EVEX.512.66.0F38.W0 1313 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert sixteen packed half precision (16-bit) floating-point values in ymm2/m256 to packed single-precision floating-point values in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="HVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTPS2PH--Convert Single-Precision FP value to 16-bit FP value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PH</mnem>
+			<args>xmm1/m64,xmm2,imm8</args>
+			<opc openc="MRI">VEX.128.66.0F3A.W0 1D 1D/r ib</opc>
+			<cpuid>
+				<flag>F16C</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floating-point values in xmm2 to packed half-precision (16-bit) floating-point values in xmm1/m64. Imm8 provides rounding controls.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PH</mnem>
+			<args>xmm1/m128,ymm2,imm8</args>
+			<opc openc="MRI">VEX.256.66.0F3A.W0 1D1D /r ib</opc>
+			<cpuid>
+				<flag>F16C</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single-precision floating-point values in ymm2 to packed half-precision (16-bit) floating-point values in xmm1/m128. Imm8 provides rounding controls.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PH</mnem>
+			<args>xmm1/m64 {k1}{z},xmm2,imm8</args>
+			<opc openc="HVM">EVEX.128.66.0F3A.W0 1D1D /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floating-point values in xmm2 to packed half-precision (16-bit) floating-point values in xmm1/m64. Imm8 provides rounding controls.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PH</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2,imm8</args>
+			<opc openc="HVM">EVEX.256.66.0F3A.W0 1D1D /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single-precision floating-point values in ymm2 to packed half-precision (16-bit) floating-point values in xmm1/m128. Imm8 provides rounding controls.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PH</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2{sae},imm8</args>
+			<opc openc="HVM">EVEX.512.66.0F3A.W0 1D1D /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert sixteen packed single-precision floating-point values in zmm2 to packed half-precision (16-bit) floatingpoint values in ymm1/m256. Imm8 provides rounding controls.</dscrp>
+		</ins>
+		<oprndenc openc="MRI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="HVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTPS2DQ--Convert Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTPS2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 5B /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floating-point values from xmm2/mem to four packed signed doubleword values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 5B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floating-point values from xmm2/mem to four packed signed doubleword values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2DQ</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 5B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single-precision floating-point values from ymm2/mem to eight packed signed doubleword values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2DQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.66.0F.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed signed doubleword values in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2DQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.66.0F.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed signed doubleword values in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2DQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.512.66.0F.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert sixteen packed single-precision floating-point values from zmm2/m512/m32bcst to sixteen packed signed doubleword values in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTPS2UDQ--Convert Packed Single-Precision Floating-Point Values to Packed Unsigned Doubleword Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2UDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.0F.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed unsigned doubleword values in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2UDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.0F.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed unsigned doubleword values in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2UDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.512.0F.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert sixteen packed single-precision floating-point values from zmm2/m512/m32bcst to sixteen packed unsigned doubleword values in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTPS2QQ--Convert Packed Single Precision Floating-Point Values to Packed Singed Quadword Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2QQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64/m32bcst</args>
+			<opc openc="HV">EVEX.128.66.0F.W0 7B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed single precision floating-point values from xmm2/m64/m32bcst to two packed signed quadword values in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2QQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="HV">EVEX.256.66.0F.W0 7B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed signed quadword values in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2QQ</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256/m32bcst{er}</args>
+			<opc openc="HV">EVEX.512.66.0F.W0 7B /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed signed quadword values in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="HV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTPS2UQQ--Convert Packed Single Precision Floating-Point Values to Packed Unsigned Quadword Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2UQQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64/m32bcst</args>
+			<opc openc="HV">EVEX.128.66.0F.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed single precision floating-point values from zmm2/m64/m32bcst to two packed unsigned quadword values in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2UQQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="HV">EVEX.256.66.0F.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed unsigned quadword values in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2UQQ</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256/m32bcst{er}</args>
+			<opc openc="HV">EVEX.512.66.0F.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed unsigned quadword values in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="HV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTPS2PD--Convert Packed Single-Precision Floating-Point Values to Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTPS2PD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">0F 5A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert two packed single-precision floating-point values in xmm2/m64 to two packed double-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.0F.WIG 5A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert two packed single-precision floating-point values in xmm2/m64 to two packed double-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PD</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.0F.WIG 5A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floating-point values in xmm2/m128 to four packed double-precision floatingpoint values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64/m32bcst</args>
+			<opc openc="HV">EVEX.128.0F.W0 5A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert two packed single-precision floating-point values in xmm2/m64/m32bcst to packed double-precision floatingpoint values in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PD</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="HV">EVEX.256.0F.W0 5A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floating-point values in xmm2/m128/m32bcst to packed double-precision floating-point values in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PD</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256/m32bcst{sae}</args>
+			<opc openc="HV">EVEX.512.0F.W0 5A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single-precision floating-point values in ymm2/m256/b32bcst to eight packed double-precision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="HV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTQQ2PD--Convert Packed Quadword Integers to Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTQQ2PD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.F3.0F.W1 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed quadword integers from xmm2/m128/m64bcst to packed double-precision floatingpoint values in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTQQ2PD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.F3.0F.W1 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed quadword integers from ymm2/m256/m64bcst to packed double-precision floatingpoint values in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTQQ2PD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.F3.0F.W1 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed quadword integers from zmm2/m512/m64bcst to eight packed double-precision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTQQ2PS--Convert Packed Quadword Integers to Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTQQ2PS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.0F.W1 5B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed quadword integers from xmm2/mem to packed single-precision floating-point values in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTQQ2PS</mnem>
+			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.0F.W1 5B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed quadword integers from ymm2/mem to packed single-precision floating-point values in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTQQ2PS</mnem>
+			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.0F.W1 5B /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed quadword integers from zmm2/mem to eight packed single-precision floating-point values in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTSD2SI--Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTSD2SI</mnem>
+			<args>r32,xmm1/m64</args>
+			<opc openc="RM">F2 0F 2D /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CVTSD2SI</mnem>
+			<args>r64,xmm1/m64</args>
+			<opc openc="RM">F2 REX.W 0F 2D /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed quadword integer signextended into r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSD2SI</mnem>
+			<args>r32,xmm1/m64</args>
+			<opc openc="RM">VEX.128.F2.0F.W0 2D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer r32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSD2SI</mnem>
+			<args>r64,xmm1/m64</args>
+			<opc openc="RM">VEX.128.F2.0F.W1 2D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed quadword integer signextended into r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSD2SI</mnem>
+			<args>r32,xmm1/m64{er}</args>
+			<opc openc="T1F">EVEX.LIG.F2.0F.W0 2D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer r32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSD2SI</mnem>
+			<args>r64,xmm1/m64{er}</args>
+			<opc openc="T1F">EVEX.LIG.F2.0F.W1 2D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed quadword integer signextended into r64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1F">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTSD2USI--Convert Scalar Double-Precision Floating-Point Value to Unsigned Doubleword Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSD2USI</mnem>
+			<args>r32,xmm1/m64{er}</args>
+			<opc openc="T1F">EVEX.LIG.F2.0F.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one unsigned doubleword integer r32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSD2USI</mnem>
+			<args>r64,xmm1/m64{er}</args>
+			<opc openc="T1F">EVEX.LIG.F2.0F.W1 79 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one unsigned quadword integer zeroextended into r64.</dscrp>
+		</ins>
+		<oprndenc openc="T1F">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTSD2SS--Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTSD2SS</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 5A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value in xmm2/m64 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSD2SS</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 5A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value in xmm3/m64 to one single-precision floating-point value and merge with high bits in xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSD2SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 5A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value in xmm3/m64 to one single-precision floating-point value and merge with high bits in xmm2 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTSI2SD--Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTSI2SD</mnem>
+			<args>xmm1,r32/m32</args>
+			<opc openc="RM">F2 0F 2A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one signed doubleword integer from r32/m32 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CVTSI2SD</mnem>
+			<args>xmm1,r/m64</args>
+			<opc openc="RM">F2 REX.W 0F 2A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one signed quadword integer from r/m64 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSI2SD</mnem>
+			<args>xmm1,xmm2,r/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.W0 2A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one signed doubleword integer from r/m32 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSI2SD</mnem>
+			<args>xmm1,xmm2,r/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.W1 2A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one signed quadword integer from r/m64 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSI2SD</mnem>
+			<args>xmm1,xmm2,r/m32</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W0 2A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one signed doubleword integer from r/m32 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSI2SD</mnem>
+			<args>xmm1,xmm2,r/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 2A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one signed quadword integer from r/m64 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTSI2SS--Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTSI2SS</mnem>
+			<args>xmm1,r/m32</args>
+			<opc openc="RM">F3 0F 2A /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Convert one signed doubleword integer from r/m32 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CVTSI2SS</mnem>
+			<args>xmm1,r/m64</args>
+			<opc openc="RM">F3 REX.W 0F 2A /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Convert one signed quadword integer from r/m64 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSI2SS</mnem>
+			<args>xmm1,xmm2,r/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.W0 2A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one signed doubleword integer from r/m32 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSI2SS</mnem>
+			<args>xmm1,xmm2,r/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.W1 2A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one signed quadword integer from r/m64 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSI2SS</mnem>
+			<args>xmm1,xmm2,r/m32{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 2A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one signed doubleword integer from r/m32 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSI2SS</mnem>
+			<args>xmm1,xmm2,r/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W1 2A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one signed quadword integer from r/m64 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTSS2SD--Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTSS2SD</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 5A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value in xmm2/m32 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSS2SD</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 5A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value in xmm3/m32 to one double-precision floating-point value and merge with high bits of xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSS2SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 5A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value in xmm3/m32 to one double-precision floating-point value and merge with high bits of xmm2 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTSS2SI--Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTSS2SI</mnem>
+			<args>r32,xmm1/m32</args>
+			<opc openc="RM">F3 0F 2D /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CVTSS2SI</mnem>
+			<args>r64,xmm1/m32</args>
+			<opc openc="RM">F3 REX.W 0F 2D /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSS2SI</mnem>
+			<args>r32,xmm1/m32</args>
+			<opc openc="RM">VEX.128.F3.0F.W0 2D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSS2SI</mnem>
+			<args>r64,xmm1/m32</args>
+			<opc openc="RM">VEX.128.F3.0F.W1 2D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSS2SI</mnem>
+			<args>r32,xmm1/m32{er}</args>
+			<opc openc="T1F">EVEX.LIG.F3.0F.W0 2D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSS2SI</mnem>
+			<args>r64,xmm1/m32{er}</args>
+			<opc openc="T1F">EVEX.LIG.F3.0F.W1 2D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1F">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTSS2USI--Convert Scalar Single-Precision Floating-Point Value to Unsigned Doubleword Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSS2USI</mnem>
+			<args>r32,xmm1/m32{er}</args>
+			<opc openc="T1F">EVEX.LIG.F3.0F.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one unsigned doubleword integer in r32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSS2USI</mnem>
+			<args>r64,xmm1/m32{er}</args>
+			<opc openc="T1F">EVEX.LIG.F3.0F.W1 79 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one unsigned quadword integer in r64.</dscrp>
+		</ins>
+		<oprndenc openc="T1F">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTTPD2DQ--Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTTPD2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E6 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/mem to two signed doubleword integers in xmm1 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG E6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/mem to two signed doubleword integers in xmm1 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2DQ</mnem>
+			<args>xmm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG E6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values in ymm2/mem to four signed doubleword integers in xmm1 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2DQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F.W1 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/m128/m64bcst to two signed doubleword integers in xmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2DQ</mnem>
+			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F.W1 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values in ymm2/m256/m64bcst to four signed doubleword integers in xmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2DQ</mnem>
+			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{sae}</args>
+			<opc openc="FV">EVEX.512.66.0F.W1 E6 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed double-precision floating-point values in zmm2/m512/m64bcst to eight signed doubleword integers in ymm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTTPD2QQ--Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Quadword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2QQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F.W1 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values from zmm2/m128/m64bcst to two packed quadword integers in zmm1 using truncation with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2QQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F.W1 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values from ymm2/m256/m64bcst to four packed quadword integers in ymm1 using truncation with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2QQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{sae}</args>
+			<opc openc="FV">EVEX.512.66.0F.W1 7A /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed double-precision floating-point values from zmm2/m512 to eight packed quadword integers in zmm1 using truncation with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTTPD2UDQ--Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Unsigned Doubleword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2UDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.0F.W1 78 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values in xmm2/m128/m64bcst to two unsigned doubleword integers in xmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2UDQ</mnem>
+			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.0F.W1 78 02 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values in ymm2/m256/m64bcst to four unsigned doubleword integers in xmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2UDQ</mnem>
+			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{sae}</args>
+			<opc openc="FV">EVEX.512.0F.W1 78 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed double-precision floating-point values in zmm2/m512/m64bcst to eight unsigned doubleword integers in ymm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTTPD2UQQ--Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Unsigned Quadword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2UQQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F.W1 78 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floating-point values from xmm2/m128/m64bcst to two packed unsigned quadword integers in xmm1 using truncation with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2UQQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F.W1 78 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floating-point values from ymm2/m256/m64bcst to four packed unsigned quadword integers in ymm1 using truncation with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2UQQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{sae}</args>
+			<opc openc="FV">EVEX.512.66.0F.W1 78 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed double-precision floating-point values from zmm2/mem to eight packed unsigned quadword integers in zmm1 using truncation with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTTPS2DQ--Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTTPS2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">F3 0F 5B /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floating-point values from xmm2/mem to four packed signed doubleword values in xmm1 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG 5B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floating-point values from xmm2/mem to four packed signed doubleword values in xmm1 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2DQ</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.F3.0F.WIG 5B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single-precision floating-point values from ymm2/mem to eight packed signed doubleword values in ymm1 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2DQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.F3.0F.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed signed doubleword values in xmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2DQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.F3.0F.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed signed doubleword values in ymm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2DQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst {sae}</args>
+			<opc openc="FV">EVEX.512.F3.0F.W0 5B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert sixteen packed single-precision floating-point values from zmm2/m512/m32bcst to sixteen packed signed doubleword values in zmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTTPS2UDQ--Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Unsigned Doubleword Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2UDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.0F.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed unsigned doubleword values in xmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2UDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.0F.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed unsigned doubleword values in ymm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2UDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{sae}</args>
+			<opc openc="FV">EVEX.512.0F.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert sixteen packed single-precision floatingpoint values from zmm2/m512/m32bcst to sixteen packed unsigned doubleword values in zmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTTPS2QQ--Convert with Truncation Packed Single Precision Floating-Point Values to Packed Singed Quadword Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2QQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64/m32bcst</args>
+			<opc openc="HV">EVEX.128.66.0F.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed single precision floating-point values from xmm2/m64/m32bcst to two packed signed quadword values in xmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2QQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="HV">EVEX.256.66.0F.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed signed quadword values in ymm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2QQ</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256/m32bcst{sae}</args>
+			<opc openc="HV">EVEX.512.66.0F.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed signed quadword values in zmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="HV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTTPS2UQQ--Convert with Truncation Packed Single Precision Floating-Point Values to Packed Unsigned Quadword Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2UQQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64/m32bcst</args>
+			<opc openc="HV">EVEX.128.66.0F.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed single precision floating-point values from zmm2/m64/m32bcst to two packed unsigned quadword values in zmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2UQQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="HV">EVEX.256.66.0F.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed unsigned quadword values in ymm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2UQQ</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256/m32bcst{sae}</args>
+			<opc openc="HV">EVEX.512.66.0F.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed unsigned quadword values in zmm1 using truncation subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="HV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTTSD2SI--Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTTSD2SI</mnem>
+			<args>r32,xmm1/m64</args>
+			<opc openc="RM">F2 0F 2C /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer in r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CVTTSD2SI</mnem>
+			<args>r64,xmm1/m64</args>
+			<opc openc="RM">F2 REX.W 0F 2C /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed quadword integer in r64 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTSD2SI</mnem>
+			<args>r32,xmm1/m64</args>
+			<opc openc="RM">VEX.128.F2.0F.W0 2C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer in r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTTSD2SI</mnem>
+			<args>r64,xmm1/m64</args>
+			<opc openc="T1F">VEX.128.F2.0F.W1 2C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed quadword integer in r64 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTSD2SI</mnem>
+			<args>r32,xmm1/m64{sae}</args>
+			<opc openc="T1F">EVEX.LIG.F2.0F.W0 2C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer in r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTTSD2SI</mnem>
+			<args>r64,xmm1/m64{sae}</args>
+			<opc openc="T1F">EVEX.LIG.F2.0F.W1 2C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed quadword integer in r64 using truncation.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1F">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTTSD2USI--Convert with Truncation Scalar Double-Precision Floating-Point Value to Unsigned Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTSD2USI</mnem>
+			<args>r32,xmm1/m64{sae}</args>
+			<opc openc="T1F">EVEX.LIG.F2.0F.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one unsigned doubleword integer r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTTSD2USI</mnem>
+			<args>r64,xmm1/m64{sae}</args>
+			<opc openc="T1F">EVEX.LIG.F2.0F.W1 78 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one unsigned quadword integer zeroextended into r64 using truncation.</dscrp>
+		</ins>
+		<oprndenc openc="T1F">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTTSS2SI--Convert with Truncation Scalar Single-Precision Floating-Point Value to Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTTSS2SI</mnem>
+			<args>r32,xmm1/m32</args>
+			<opc openc="RM">F3 0F 2C /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CVTTSS2SI</mnem>
+			<args>r64,xmm1/m32</args>
+			<opc openc="RM">F3 REX.W 0F 2C /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTSS2SI</mnem>
+			<args>r32,xmm1/m32</args>
+			<opc openc="RM">VEX.128.F3.0F.W0 2C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTTSS2SI</mnem>
+			<args>r64,xmm1/m32</args>
+			<opc openc="RM">VEX.128.F3.0F.W1 2C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTSS2SI</mnem>
+			<args>r32,xmm1/m32{sae}</args>
+			<opc openc="T1F">EVEX.LIG.F3.0F.W0 2C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTTSS2SI</mnem>
+			<args>r64,xmm1/m32{sae}</args>
+			<opc openc="T1F">EVEX.LIG.F3.0F.W1 2C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64 using truncation.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1F">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTTSS2USI--Convert with Truncation Scalar Single-Precision Floating-Point Value to Unsigned Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTSS2USI</mnem>
+			<args>r32,xmm1/m32{sae}</args>
+			<opc openc="T1F">EVEX.LIG.F3.0F.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one unsigned doubleword integer in r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTTSS2USI</mnem>
+			<args>r64,xmm1/m32{sae}</args>
+			<opc openc="T1F">EVEX.LIG.F3.0F.W1 78 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one unsigned quadword integer in r64 using truncation.</dscrp>
+		</ins>
+		<oprndenc openc="T1F">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTUDQ2PD--Convert Packed Unsigned Doubleword Integers to Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUDQ2PD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64/m32bcst</args>
+			<opc openc="HV">EVEX.128.F3.0F.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert two packed unsigned doubleword integers from ymm2/m64/m32bcst to packed double-precision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUDQ2PD</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="HV">EVEX.256.F3.0F.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed doubleprecision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUDQ2PD</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="HV">EVEX.512.F3.0F.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed unsigned doubleword integers from ymm2/m256/m32bcst to eight packed doubleprecision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="HV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTUDQ2PS--Convert Packed Unsigned Doubleword Integers to Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUDQ2PS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.F2.0F.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed single-precision floating-point values in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUDQ2PS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.F2.0F.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert eight packed unsigned doubleword integers from ymm2/m256/m32bcst to packed single-precision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUDQ2PS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.512.F2.0F.W0 7A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert sixteen packed unsigned doubleword integers from zmm2/m512/m32bcst to sixteen packed singleprecision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTUQQ2PD--Convert Packed Unsigned Quadword Integers to Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUQQ2PD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.F3.0F.W1 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed unsigned quadword integers from xmm2/m128/m64bcst to two packed double-precision floating-point values in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUQQ2PD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.F3.0F.W1 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed unsigned quadword integers from ymm2/m256/m64bcst to packed double-precision floatingpoint values in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUQQ2PD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.F3.0F.W1 7A /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed unsigned quadword integers from zmm2/m512/m64bcst to eight packed double-precision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTUQQ2PS--Convert Packed Unsigned Quadword Integers to Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUQQ2PS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.F2.0F.W1 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert two packed unsigned quadword integers from xmm2/m128/m64bcst to packed single-precision floatingpoint values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUQQ2PS</mnem>
+			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.F2.0F.W1 7A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert four packed unsigned quadword integers from ymm2/m256/m64bcst to packed single-precision floatingpoint values in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUQQ2PS</mnem>
+			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.F2.0F.W1 7A /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Convert eight packed unsigned quadword integers from zmm2/m512/m64bcst to eight packed single-precision floating-point values in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTUSI2SD--Convert Unsigned Integer to Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUSI2SD</mnem>
+			<args>xmm1,xmm2,r/m32</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W0 7B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one unsigned doubleword integer from r/m32 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTUSI2SD</mnem>
+			<args>xmm1,xmm2,r/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 7B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one unsigned quadword integer from r/m64 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTUSI2SS--Convert Unsigned Integer to Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTUSI2SS</mnem>
+			<args>xmm1,xmm2,r/m32{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 7B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one signed doubleword integer from r/m32 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTUSI2SS</mnem>
+			<args>xmm1,xmm2,r/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W1 7B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert one signed quadword integer from r/m64 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VDBPSADBW--Double Block Packed Sum-Absolute-Differences (SAD) on Unsigned Bytes.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VDBPSADBW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F3A.W0 42 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compute packed SAD word results of unsigned bytes in dword block from xmm2 with unsigned bytes of dword blocks transformed from xmm3/m128 using the shuffle controls in imm8. Results are written to xmm1 under the writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDBPSADBW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F3A.W0 42 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compute packed SAD word results of unsigned bytes in dword block from ymm2 with unsigned bytes of dword blocks transformed from ymm3/m256 using the shuffle controls in imm8. Results are written to ymm1 under the writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDBPSADBW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F3A.W0 42 /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compute packed SAD word results of unsigned bytes in dword block from zmm2 with unsigned bytes of dword blocks transformed from zmm3/m512 using the shuffle controls in imm8. Results are written to zmm1 under the writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VEXPANDPD--Load Sparse Packed Double-Precision Floating-Point Values from Dense Memory.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXPANDPD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 88 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed double-precision floating-point values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXPANDPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 88 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed double-precision floating-point values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXPANDPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 88 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed double-precision floating-point values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VEXPANDPS--Load Sparse Packed Single-Precision Floating-Point Values from Dense Memory.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXPANDPS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 88 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed single-precision floating-point values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXPANDPS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 88 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed single-precision floating-point values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXPANDPS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 88 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed single-precision floating-point values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VEXTRACTF128/VEXTRACTF32x4/VEXTRACTF64x2/VEXTRACTF32x8/VEXTRACTF64x4--Extr act Packed Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTF128</mnem>
+			<args>xmm1/m128,ymm2,imm8</args>
+			<opc openc="RMI">VEX.256.66.0F3A.W0 19 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of packed floating-point values from ymm2 and store results in xmm1/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTF32X4</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2,imm8</args>
+			<opc openc="T4">EVEX.256.66.0F3A.W0 19 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of packed single-precision floatingpoint values from ymm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTF32x4</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2,imm8</args>
+			<opc openc="T4">EVEX.512.66.0F3A.W0 19 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of packed single-precision floatingpoint values from zmm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTF64X2</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2,imm8</args>
+			<opc openc="T2">EVEX.256.66.0F3A.W1 19 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of packed double-precision floating-point values from ymm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTF64X2</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2,imm8</args>
+			<opc openc="T2">EVEX.512.66.0F3A.W1 19 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of packed double-precision floating-point values from zmm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTF32X8</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2,imm8</args>
+			<opc openc="T8">EVEX.512.66.0F3A.W0 1B /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Extract 256 bits of packed single-precision floatingpoint values from zmm2 and store results in ymm1/m256 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTF64x4</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2,imm8</args>
+			<opc openc="T4">EVEX.512.66.0F3A.W1 1B /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Extract 256 bits of packed double-precision floating-point values from zmm2 and store results in ymm1/m256 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T4">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T8">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VEXTRACTI128/VEXTRACTI32x4/VEXTRACTI64x2/VEXTRACTI32x8/VEXTRACTI64x4--Extract packed Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTI128</mnem>
+			<args>xmm1/m128,ymm2,imm8</args>
+			<opc openc="RMI">VEX.256.66.0F3A.W0 39 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of integer data from ymm2 and store results in xmm1/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTI32X4</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2,imm8</args>
+			<opc openc="T4">EVEX.256.66.0F3A.W0 39 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of double-word integer values from ymm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTI32x4</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2,imm8</args>
+			<opc openc="T4">EVEX.512.66.0F3A.W0 39 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of double-word integer values from zmm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTI64X2</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2,imm8</args>
+			<opc openc="T2">EVEX.256.66.0F3A.W1 39 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of quad-word integer values from ymm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTI64X2</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2,imm8</args>
+			<opc openc="T2">EVEX.512.66.0F3A.W1 39 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of quad-word integer values from zmm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTI32X8</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2,imm8</args>
+			<opc openc="T8">EVEX.512.66.0F3A.W0 3B /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Extract 256 bits of double-word integer values from zmm2 and store results in ymm1/m256 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTI64x4</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2,imm8</args>
+			<opc openc="T4">EVEX.512.66.0F3A.W1 3B /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Extract 256 bits of quad-word integer values from zmm2 and store results in ymm1/m256 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T4">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T8">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>EXTRACTPS--Extract Packed Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>EXTRACTPS</mnem>
+			<args>reg/m32,xmm1,imm8</args>
+			<opc openc="RMI">66 0F 3A 17 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Extract one single-precision floating-point value from xmm1 at the offset specified by imm8 and store the result in reg or m32. Zero extend the results in 64-bit register if applicable.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTPS</mnem>
+			<args>reg/m32,xmm1,imm8</args>
+			<opc openc="RMI">VEX.128.66.0F3A.WIG 17 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract one single-precision floating-point value from xmm1 at the offset specified by imm8 and store the result in reg or m32. Zero extend the results in 64-bit register if applicable.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTPS</mnem>
+			<args>reg/m32,xmm1,imm8</args>
+			<opc openc="T1S">EVEX.128.66.0F3A.WIG 17 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Extract one single-precision floating-point value from xmm1 at the offset specified by imm8 and store the result in reg or m32. Zero extend the results in 64-bit register if applicable.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFIXUPIMMPD--Fix Up Special Packed Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFIXUPIMMPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W1 54 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Fix up special numbers in float64 vector xmm1, float64 vector xmm2 and int64 vector xmm3/m128/m64bcst and store the result in xmm1, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFIXUPIMMPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 54 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Fix up special numbers in float64 vector ymm1, float64 vector ymm2 and int64 vector ymm3/m256/m64bcst and store the result in ymm1, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFIXUPIMMPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 54 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Fix up elements of float64 vector in zmm2 using int64 vector table in zmm3/m512/m64bcst, combine with preserved elements from zmm1, and store the result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFIXUPIMMPS--Fix Up Special Packed Float32 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFIXUPIMMPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W0 54 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Fix up special numbers in float32 vector xmm1, float32 vector xmm2 and int32 vector xmm3/m128/m32bcst and store the result in xmm1, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFIXUPIMMPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 54 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Fix up special numbers in float32 vector ymm1, float32 vector ymm2 and int32 vector ymm3/m256/m32bcst and store the result in ymm1, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFIXUPIMMPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 54 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Fix up elements of float32 vector in zmm2 using int32 vector table in zmm3/m512/m32bcst, combine with preserved elements from zmm1, and store the result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFIXUPIMMSD--Fix Up Special Scalar Float64 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFIXUPIMMSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W1 55 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Fix up a float64 number in the low quadword element of xmm2 using scalar int32 table in xmm3/m64 and store the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFIXUPIMMSS--Fix Up Special Scalar Float32 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFIXUPIMMSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W0 55 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Fix up a float32 number in the low doubleword element in xmm2 using scalar int32 table in xmm3/m32 and store the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMADD132PD/VFMADD213PD/VFMADD231PD--Fused Multiply-Add of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 98 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/mem, add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 A8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, add to xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 B8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/mem, add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 98 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/mem, add to ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 A8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, add to ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 B8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/mem, add to ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="RVM">EVEX.NDS.128.66.0F38.W1 98 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/m128/m64bcst, add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 A8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, add to xmm3/m128/m64bcst and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 B8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/m128/m64bcst, add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 98 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/m256/m64bcst, add to ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 A8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, add to ymm3/m256/m64bcst and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 B8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/m256/m64bcst, add to ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 98 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm3/m512/m64bcst, add to zmm2 and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 A8 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm2, add to zmm3/m512/m64bcst and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 B8 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm2 and zmm3/m512/m64bcst, add to zmm1 and put result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMADD132PS/VFMADD213PS/VFMADD231PS--Fused Multiply-Add of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 98 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/mem, add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 A8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, add to xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 B8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/mem, add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 98 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/mem, add to ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 A8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, add to ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 B8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/mem, add to ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 98 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/m128/m32bcst, add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 A8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, add to xmm3/m128/m32bcst and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 B8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/m128/m32bcst, add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 98 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/m256/m32bcst, add to ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 A8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, add to ymm3/m256/m32bcst and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 B8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/m256/m32bcst, add to ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 98 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm3/m512/m32bcst, add to zmm2 and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 A8 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm2, add to zmm3/m512/m32bcst and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 B8 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm2 and zmm3/m512/m32bcst, add to zmm1 and put result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMADD132SD/VFMADD213SD/VFMADD231SD--Fused Multiply-Add of Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 99 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/m64, add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 A9 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, add to xmm3/m64 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 B9 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/m64, add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 99 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/m64, add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 A9 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, add to xmm3/m64 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 B9 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/m64, add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMADD132SS/VFMADD213SS/VFMADD231SS--Fused Multiply-Add of Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 99 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 A9 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, add to xmm3/m32 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 B9 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 99 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 A9 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, add to xmm3/m32 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 B9 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMADDSUB132PD/VFMADDSUB213PD/VFMADDSUB231PD--Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 96 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/mem, add/subtract elements in xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 A6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, add/subtract elements in xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 B6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/mem, add/subtract elements in xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W1 96 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/mem, add/subtract elements in ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W1 A6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, add/subtract elements in ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W1 B6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/mem, add/subtract elements in ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 A6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, add/subtract elements in xmm3/m128/m64bcst and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 B6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/m128/m64bcst, add/subtract elements in xmm1 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 96 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/m128/m64bcst, add/subtract elements in xmm2 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 A6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, add/subtract elements in ymm3/m256/m64bcst and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 B6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/m256/m64bcst, add/subtract elements in ymm1 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 96 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/m256/m64bcst, add/subtract elements in ymm2 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 A6 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1and zmm2, add/subtract elements in zmm3/m512/m64bcst and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 B6 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm2 and zmm3/m512/m64bcst, add/subtract elements in zmm1 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 96 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm3/m512/m64bcst, add/subtract elements in zmm2 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMADDSUB132PS/VFMADDSUB213PS/VFMADDSUB231PS--Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 96 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/mem, add/subtract elements in xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 A6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, add/subtract elements in xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 B6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/mem, add/subtract elements in xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W0 96 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/mem, add/subtract elements in ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W0 A6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, add/subtract elements in ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W0 B6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/mem, add/subtract elements in ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 A6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, add/subtract elements in xmm3/m128/m32bcst and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 B6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/m128/m32bcst, add/subtract elements in xmm1 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 96 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/m128/m32bcst, add/subtract elements in zmm2 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 A6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, add/subtract elements in ymm3/m256/m32bcst and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 B6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/m256/m32bcst, add/subtract elements in ymm1 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 96 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/m256/m32bcst, add/subtract elements in ymm2 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 A6 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm2, add/subtract elements in zmm3/m512/m32bcst and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 B6 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm2 and zmm3/m512/m32bcst, add/subtract elements in zmm1 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 96 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm3/m512/m32bcst, add/subtract elements in zmm2 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMSUBADD132PD/VFMSUBADD213PD/VFMSUBADD231PD--Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 97 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/mem, subtract/add elements in xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 A7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, subtract/add elements in xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 B7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/mem, subtract/add elements in xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W1 97 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/mem, subtract/add elements in ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W1 A7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, subtract/add elements in ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W1 B7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/mem, subtract/add elements in ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 97 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/m128/m64bcst, subtract/add elements in xmm2 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 A7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, subtract/add elements in xmm3/m128/m64bcst and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 B7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/m128/m64bcst, subtract/add elements in xmm1 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 97 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/m256/m64bcst, subtract/add elements in ymm2 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 A7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, subtract/add elements in ymm3/m256/m64bcst and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 B7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/m256/m64bcst, subtract/add elements in ymm1 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 97 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm3/m512/m64bcst, subtract/add elements in zmm2 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 A7 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm2, subtract/add elements in zmm3/m512/m64bcst and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 B7 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm2 and zmm3/m512/m64bcst, subtract/add elements in zmm1 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMSUBADD132PS/VFMSUBADD213PS/VFMSUBADD231PS--Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 97 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/mem, subtract/add elements in xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 A7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, subtract/add elements in xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 B7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/mem, subtract/add elements in xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W0 97 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/mem, subtract/add elements in ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W0 A7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, subtract/add elements in ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.DDS.256.66.0F38.W0 B7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/mem, subtract/add elements in ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 97 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/m128/m32bcst, subtract/add elements in xmm2 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 A7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, subtract/add elements in xmm3/m128/m32bcst and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 B7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/m128/m32bcst, subtract/add elements in xmm1 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 97 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/m256/m32bcst, subtract/add elements in ymm2 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 A7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, subtract/add elements in ymm3/m256/m32bcst and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 B7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/m256/m32bcst, subtract/add elements in ymm1 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 97 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm3/m512/m32bcst, subtract/add elements in zmm2 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 A7 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm2, subtract/add elements in zmm3/m512/m32bcst and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 B7 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm2 and zmm3/m512/m32bcst, subtract/add elements in zmm1 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMSUB132PD/VFMSUB213PD/VFMSUB231PD--Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 9A /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/mem, subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 AA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, subtract xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 BA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/mem, subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 9A /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/mem, subtract ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 AA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, subtract ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 BA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/mem, subtract ymm1 and put result in ymm1.S.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 9A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/m128/m64bcst, subtract xmm2 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 AA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, subtract xmm3/m128/m64bcst and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 BA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/m128/m64bcst, subtract xmm1 and put result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 9A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/m256/m64bcst, subtract ymm2 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 AA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, subtract ymm3/m256/m64bcst and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 BA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/m256/m64bcst, subtract ymm1 and put result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 9A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm3/m512/m64bcst, subtract zmm2 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 AA /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm2, subtract zmm3/m512/m64bcst and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 BA /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm2 and zmm3/m512/m64bcst, subtract zmm1 and put result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMSUB132PS/VFMSUB213PS/VFMSUB231PS--Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 9A /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/mem, subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 AA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, subtract xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 BA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/mem, subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 9A /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/mem, subtract ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 AA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, subtract ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 BA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/mem, subtract ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 9A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/m128/m32bcst, subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 AA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, subtract xmm3/m128/m32bcst and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 BA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/m128/m32bcst, subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 9A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/m256/m32bcst, subtract ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 AA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, subtract ymm3/m256/m32bcst and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 BA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/m256/m32bcst, subtract ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 9A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm3/m512/m32bcst, subtract zmm2 and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 AA /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm2, subtract zmm3/m512/m32bcst and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 BA /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm2 and zmm3/m512/m32bcst, subtract zmm1 and put result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMSUB132SD/VFMSUB213SD/VFMSUB231SD--Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 9B /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/m64, subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 AB /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, subtract xmm3/m64 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 BB /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/m64, subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 9B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/m64, subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 AB /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, subtract xmm3/m64 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 BB /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/m64, subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMSUB132SS/VFMSUB213SS/VFMSUB231SS--Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 9B /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 AB /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, subtract xmm3/m32 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 BB /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 9B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 AB /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, subtract xmm3/m32 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 BB /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMADD132PD/VFNMADD213PD/VFNMADD231PD--Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 9C /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/mem, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 AC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, negate the multiplication result and add to xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 BC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/mem, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 9C /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/mem, negate the multiplication result and add to ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 AC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, negate the multiplication result and add to ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 BC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/mem, negate the multiplication result and add to ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PD</mnem>
+			<args>xmm0 {k1}{z},xmm1,xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 9C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/m128/m64bcst, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 AC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, negate the multiplication result and add to xmm3/m128/m64bcst and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 BC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/m128/m64bcst, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 9C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/m256/m64bcst, negate the multiplication result and add to ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 AC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, negate the multiplication result and add to ymm3/m256/m64bcst and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 BC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/m256/m64bcst, negate the multiplication result and add to ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 9C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm3/m512/m64bcst, negate the multiplication result and add to zmm2 and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 AC /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm2, negate the multiplication result and add to zmm3/m512/m64bcst and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 BC /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm2 and zmm3/m512/m64bcst, negate the multiplication result and add to zmm1 and put result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMADD132PS/VFNMADD213PS/VFNMADD231PS--Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 9C /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/mem, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 AC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, negate the multiplication result and add to xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 BC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/mem, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 9C /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/mem, negate the multiplication result and add to ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 AC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, negate the multiplication result and add to ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 BC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/mem, negate the multiplication result and add to ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 9C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/m128/m32bcst, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 AC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, negate the multiplication result and add to xmm3/m128/m32bcst and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 BC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/m128/m32bcst, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 9C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/m256/m32bcst, negate the multiplication result and add to ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 AC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, negate the multiplication result and add to ymm3/m256/m32bcst and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 BC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/m256/m32bcst, negate the multiplication result and add to ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 9C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm3/m512/m32bcst, negate the multiplication result and add to zmm2 and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 AC /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm2, negate the multiplication result and add to zmm3/m512/m32bcst and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 BC /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm2 and zmm3/m512/m32bcst, negate the multiplication result and add to zmm1 and put result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMADD132SD/VFNMADD213SD/VFNMADD231SD--Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 9D /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/mem, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 AD /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, negate the multiplication result and add to xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 BD /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/mem, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 9D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/m64, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 AD /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, negate the multiplication result and add to xmm3/m64 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 BD /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/m64, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMADD132SS/VFNMADD213SS/VFNMADD231SS--Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 9D /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 AD /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, negate the multiplication result and add to xmm3/m32 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 BD /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 9D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 AD /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, negate the multiplication result and add to xmm3/m32 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 BD /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMSUB132PD/VFNMSUB213PD/VFNMSUB231PD--Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 9E /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/mem, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 AE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, negate the multiplication result and subtract xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 BE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/mem, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 9E /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/mem, negate the multiplication result and subtract ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 AE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, negate the multiplication result and subtract ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 BE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/mem, negate the multiplication result and subtract ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 9E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/m128/m64bcst, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 AE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, negate the multiplication result and subtract xmm3/m128/m64bcst and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 BE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/m128/m64bcst, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 9E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/m256/m64bcst, negate the multiplication result and subtract ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 AE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, negate the multiplication result and subtract ymm3/m256/m64bcst and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 BE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/m256/m64bcst, negate the multiplication result and subtract ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 9E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm3/m512/m64bcst, negate the multiplication result and subtract zmm2 and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 AE /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm2, negate the multiplication result and subtract zmm3/m512/m64bcst and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 BE /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from zmm2 and zmm3/m512/m64bcst, negate the multiplication result and subtract zmm1 and put result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMSUB132PS/VFNMSUB213PS/VFNMSUB231PS--Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 9E /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/mem, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 AE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, negate the multiplication result and subtract xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 BE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/mem, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 9E /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/mem, negate the multiplication result and subtract ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 AE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, negate the multiplication result and subtract ymm3/mem and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 BE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/mem, negate the multiplication result and subtract ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 9E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/m128/m32bcst, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 AE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, negate the multiplication result and subtract xmm3/m128/m32bcst and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 BE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/m128/m32bcst, negate the multiplication result subtract add to xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 9E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/m256/m32bcst, negate the multiplication result and subtract ymm2 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 AE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, negate the multiplication result and subtract ymm3/m256/m32bcst and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 BE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/m256/m32bcst, negate the multiplication result subtract add to ymm1 and put result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 9E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm3/m512/m32bcst, negate the multiplication result and subtract zmm2 and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 AE /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm2, negate the multiplication result and subtract zmm3/m512/m32bcst and put result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 BE /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from zmm2 and zmm3/m512/m32bcst, negate the multiplication result subtract add to zmm1 and put result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMSUB132SD/VFNMSUB213SD/VFNMSUB231SD--Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 9F /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/mem, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 AF /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, negate the multiplication result and subtract xmm3/mem and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231SD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 BF /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/mem, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 9F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/m64, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 AF /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, negate the multiplication result and subtract xmm3/m64 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 BF /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/m64, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMSUB132SS/VFNMSUB213SS/VFNMSUB231SS--Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 9F /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 AF /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, negate the multiplication result and subtract xmm3/m32 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231SS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 BF /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 9F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 AF /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, negate the multiplication result and subtract xmm3/m32 and put result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 BF /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFPCLASSPD--Tests Types Of a Packed Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFPCLASSPD</mnem>
+			<args>k2 {k1},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W1 66 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFPCLASSPD</mnem>
+			<args>k2 {k1},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W1 66 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFPCLASSPD</mnem>
+			<args>k2 {k1},zmm2/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W1 66 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFPCLASSPS--Tests Types Of a Packed Float32 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFPCLASSPS</mnem>
+			<args>k2 {k1},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W0 66 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFPCLASSPS</mnem>
+			<args>k2 {k1},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W0 66 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFPCLASSPS</mnem>
+			<args>k2 {k1},zmm2/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W0 66 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFPCLASSSD--Tests Types Of a Scalar Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFPCLASSSD</mnem>
+			<args>k2 {k1},xmm2/m64,imm8</args>
+			<opc openc="T1S">EVEX.LIG.66.0F3A.W1 67 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFPCLASSSS--Tests Types Of a Scalar Float32 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFPCLASSSS</mnem>
+			<args>k2 {k1},xmm2/m32,imm8</args>
+			<opc openc="T1S">EVEX.LIG.66.0F3A.W0 67 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPGATHERDD/VPGATHERDQ--Gather Packed Dword, Packed Qword with Signed Dword Indices.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERDD</mnem>
+			<args>xmm1 {k1},vm32x</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 90 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather dword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERDD</mnem>
+			<args>ymm1 {k1},vm32y</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 90 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather dword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERDD</mnem>
+			<args>zmm1 {k1},vm32z</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 90 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather dword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERDQ</mnem>
+			<args>xmm1 {k1},vm32x</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 90 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather quadword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERDQ</mnem>
+			<args>ymm1 {k1},vm32x</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 90 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather quadword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERDQ</mnem>
+			<args>zmm1 {k1},vm32y</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 90 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather quadword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPGATHERQD/VPGATHERQQ--Gather Packed Dword, Packed Qword with Signed Qword Indices.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERQD</mnem>
+			<args>xmm1 {k1},vm64x</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 91 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather dword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERQD</mnem>
+			<args>xmm1 {k1},vm64y</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 91 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather dword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERQD</mnem>
+			<args>ymm1 {k1},vm64z</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 91 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather dword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERQQ</mnem>
+			<args>xmm1 {k1},vm64x</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 91 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather quadword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERQQ</mnem>
+			<args>ymm1 {k1},vm64y</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 91 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather quadword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERQQ</mnem>
+			<args>zmm1 {k1},vm64z</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 91 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather quadword values from memory using writemask k1 for merging-masking.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGATHERDPS/VGATHERDPD--Gather Packed Single, Packed Double with Signed Dword.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERDPS</mnem>
+			<args>xmm1 {k1},vm32x</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 92 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather single-precision floatingpoint values from memory using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERDPS</mnem>
+			<args>ymm1 {k1},vm32y</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 92 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather single-precision floatingpoint values from memory using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERDPS</mnem>
+			<args>zmm1 {k1},vm32z</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 92 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather single-precision floatingpoint values from memory using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERDPD</mnem>
+			<args>xmm1 {k1},vm32x</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 92 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather float64 vector into float64 vector xmm1 using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERDPD</mnem>
+			<args>ymm1 {k1},vm32x</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 92 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather float64 vector into float64 vector ymm1 using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERDPD</mnem>
+			<args>zmm1 {k1},vm32y</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 92 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, gather float64 vector into float64 vector zmm1 using k1 as completion mask.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGATHERQPS/VGATHERQPD--Gather Packed Single, Packed Double with Signed Qword Indices.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERQPS</mnem>
+			<args>xmm1 {k1},vm64x</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 93 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather single-precision floating-point values from memory using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERQPS</mnem>
+			<args>xmm1 {k1},vm64y</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 93 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather single-precision floating-point values from memory using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERQPS</mnem>
+			<args>ymm1 {k1},vm64z</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 93 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather single-precision floating-point values from memory using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERQPD</mnem>
+			<args>xmm1 {k1},vm64x</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 93 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather float64 vector into float64 vector xmm1 using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERQPD</mnem>
+			<args>ymm1 {k1},vm64y</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 93 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather float64 vector into float64 vector ymm1 using k1 as completion mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERQPD</mnem>
+			<args>zmm1 {k1},vm64z</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 93 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, gather float64 vector into float64 vector zmm1 using k1 as completion mask.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGETEXPPD--Convert Exponents of Packed DP FP Values to DP FP Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETEXPPD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W1 42 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert the exponent of packed double-precision floating-point values in the source operand to DP FP results representing unbiased integer exponents and stores the results in the destination register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETEXPPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W1 42 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert the exponent of packed double-precision floating-point values in the source operand to DP FP results representing unbiased integer exponents and stores the results in the destination register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETEXPPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{sae}</args>
+			<opc openc="FV">EVEX.512.66.0F38.W1 42 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert the exponent of packed double-precision floating-point values in the source operand to DP FP results representing unbiased integer exponents and stores the results in the destination under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGETEXPPS--Convert Exponents of Packed SP FP Values to SP FP Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETEXPPS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W0 42 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert the exponent of packed single-precision floating-point values in the source operand to SP FP results representing unbiased integer exponents and stores the results in the destination register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETEXPPS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W0 42 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert the exponent of packed single-precision floating-point values in the source operand to SP FP results representing unbiased integer exponents and stores the results in the destination register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETEXPPS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{sae}</args>
+			<opc openc="FV">EVEX.512.66.0F38.W0 42 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert the exponent of packed single-precision floating-point values in the source operand to SP FP results representing unbiased integer exponents and stores the results in the destination register.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGETEXPSD--Convert Exponents of Scalar DP FP Values to DP FP Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETEXPSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W1 43 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert the biased exponent (bits 62:52) of the low doubleprecision floating-point value in xmm3/m64 to a DP FP value representing unbiased integer exponent. Stores the result to the low 64-bit of xmm1 under the writemask k1 and merge with the other elements of xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGETEXPSS--Convert Exponents of Scalar SP FP Values to SP FP Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETEXPSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W0 43 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Convert the biased exponent (bits 30:23) of the low singleprecision floating-point value in xmm3/m32 to a SP FP value representing unbiased integer exponent. Stores the result to xmm1 under the writemask k1 and merge with the other elements of xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGETMANTPD--Extract Float64 Vector of Normalized Mantissas from Float64 Vector.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETMANTPD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W1 26 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Get Normalized Mantissa from float64 vector xmm2/m128/m64bcst and store the result in xmm1, using imm8 for sign control and mantissa interval normalization, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETMANTPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W1 26 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Get Normalized Mantissa from float64 vector ymm2/m256/m64bcst and store the result in ymm1, using imm8 for sign control and mantissa interval normalization, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETMANTPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W1 26 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Get Normalized Mantissa from float64 vector zmm2/m512/m64bcst and store the result in zmm1, using imm8 for sign control and mantissa interval normalization, under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FVI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGETMANTPS--Extract Float32 Vector of Normalized Mantissas from Float32 Vector.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETMANTPS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W0 26 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Get normalized mantissa from float32 vector xmm2/m128/m32bcst and store the result in xmm1, using imm8 for sign control and mantissa interval normalization, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETMANTPS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W0 26 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Get normalized mantissa from float32 vector ymm2/m256/m32bcst and store the result in ymm1, using imm8 for sign control and mantissa interval normalization, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETMANTPS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W0 26 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Get normalized mantissa from float32 vector zmm2/m512/m32bcst and store the result in zmm1, using imm8 for sign control and mantissa interval normalization, under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FVI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGETMANTSD--Extract Float64 of Normalized Mantissas from Float64 Scalar.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETMANTSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W1 27 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Extract the normalized mantissa of the low float64 element in xmm3/m64 using imm8 for sign control and mantissa interval normalization. Store the mantissa to xmm1 under the writemask k1 and merge with the other elements of xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGETMANTSS--Extract Float32 Vector of Normalized Mantissa from Float32 Vector.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGETMANTSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W0 27 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Extract the normalized mantissa from the low float32 element of xmm3/m32 using imm8 for sign control and mantissa interval normalization, store the mantissa to xmm1 under the writemask k1 and merge with the other elements of xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VINSERTF128/VINSERTF32x4/VINSERTF64x2/VINSERTF32x8/VINSERTF64x4--Insert Packed Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTF128</mnem>
+			<args>ymm1,ymm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F3A.W0 18 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of packed floating-point values from xmm3/m128 and the remaining values from ymm2 into ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTF32X4</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128,imm8</args>
+			<opc openc="T4">EVEX.NDS.256.66.0F3A.W0 18 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of packed single-precision floatingpoint values from xmm3/m128 and the remaining values from ymm2 into ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTF32X4</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128,imm8</args>
+			<opc openc="T4">EVEX.NDS.512.66.0F3A.W0 18 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of packed single-precision floatingpoint values from xmm3/m128 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTF64X2</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128,imm8</args>
+			<opc openc="T2">EVEX.NDS.256.66.0F3A.W1 18 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of packed double-precision floatingpoint values from xmm3/m128 and the remaining values from ymm2 into ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTF64X2</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128,imm8</args>
+			<opc openc="T2">EVEX.NDS.512.66.0F3A.W1 18 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of packed double-precision floatingpoint values from xmm3/m128 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTF32X8</mnem>
+			<args>zmm1 {k1}{z},zmm2,ymm3/m256,imm8</args>
+			<opc openc="T8">EVEX.NDS.512.66.0F3A.W0 1A /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Insert 256 bits of packed single-precision floatingpoint values from ymm3/m256 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTF64X4</mnem>
+			<args>zmm1 {k1}{z},zmm2,ymm3/m256,imm8</args>
+			<opc openc="T4">EVEX.NDS.512.66.0F3A.W1 1A /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Insert 256 bits of packed double-precision floatingpoint values from ymm3/m256 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T4">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T8">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VINSERTI128/VINSERTI32x4/VINSERTI64x2/VINSERTI32x8/VINSERTI64x4--Insert Packed Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTI128</mnem>
+			<args>ymm1,ymm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F3A.W0 38 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of integer data from xmm3/m128 and the remaining values from ymm2 into ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTI32X4</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128,imm8</args>
+			<opc openc="T4">EVEX.NDS.256.66.0F3A.W0 38 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of packed doubleword integer values from xmm3/m128 and the remaining values from ymm2 into ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTI32X4</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128,imm8</args>
+			<opc openc="T4">EVEX.NDS.512.66.0F3A.W0 38 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of packed doubleword integer values from xmm3/m128 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTI64X2</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128,imm8</args>
+			<opc openc="T2">EVEX.NDS.256.66.0F3A.W1 38 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of packed quadword integer values from xmm3/m128 and the remaining values from ymm2 into ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTI64X2</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128,imm8</args>
+			<opc openc="T2">EVEX.NDS.512.66.0F3A.W1 38 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Insert 128 bits of packed quadword integer values from xmm3/m128 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTI32X8</mnem>
+			<args>zmm1 {k1}{z},zmm2,ymm3/m256,imm8</args>
+			<opc openc="T8">EVEX.NDS.512.66.0F3A.W0 3A /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Insert 256 bits of packed doubleword integer values from ymm3/m256 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTI64X4</mnem>
+			<args>zmm1 {k1}{z},zmm2,ymm3/m256,imm8</args>
+			<opc openc="T4">EVEX.NDS.512.66.0F3A.W1 3A /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Insert 256 bits of packed quadword integer values from ymm3/m256 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T4">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T8">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>INSERTPS--Insert Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>INSERTPS</mnem>
+			<args>xmm1,xmm2/m32,imm8</args>
+			<opc openc="RMI">66 0F 3A 21 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Insert a single-precision floating-point value selected by imm8 from xmm2/m32 into xmm1 at the specified destination element specified by imm8 and zero out destination elements in xmm1 as indicated in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTPS</mnem>
+			<args>xmm1,xmm2,xmm3/m32,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.WIG 21 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Insert a single-precision floating-point value selected by imm8 from xmm3/m32 and merge with values in xmm2 at the specified destination element specified by imm8 and write out the result and zero out destination elements in xmm1 as indicated in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTPS</mnem>
+			<args>xmm1,xmm2,xmm3/m32,imm8</args>
+			<opc openc="T1S">EVEX.NDS.128.66.0F3A.W0 21 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Insert a single-precision floating-point value selected by imm8 from xmm3/m32 and merge with values in xmm2 at the specified destination element specified by imm8 and write out the result and zero out destination elements in xmm1 as indicated in imm8.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MAXPD--Maximum of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MAXPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 5F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the maximum double-precision floating-point values between xmm1 and xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 5F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the maximum double-precision floating-point values between xmm2 and xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 5F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the maximum packed double-precision floating-point values between ymm2 and ymm3/m256.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 5F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the maximum packed double-precision floating-point values between xmm2 and xmm3/m128/m64bcst and store result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 5F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the maximum packed double-precision floating-point values between ymm2 and ymm3/m256/m64bcst and store result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{sae}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 5F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the maximum packed double-precision floating-point values between zmm2 and zmm3/m512/m64bcst and store result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MAXPS--Maximum of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MAXPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 5F /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the maximum single-precision floating-point values between xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 5F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the maximum single-precision floating-point values between xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 5F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the maximum single-precision floating-point values between ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 5F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the maximum packed single-precision floating-point values between xmm2 and xmm3/m128/m32bcst and store result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 5F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the maximum packed single-precision floating-point values between ymm2 and ymm3/m256/m32bcst and store result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{sae}</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 5F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the maximum packed single-precision floating-point values between zmm2 and zmm3/m512/m32bcst and store result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MAXSD--Return Maximum Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MAXSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 5F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the maximum scalar double-precision floating-point value between xmm2/m64 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 5F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the maximum scalar double-precision floating-point value between xmm3/m64 and xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 5F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the maximum scalar double-precision floating-point value between xmm3/m64 and xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MAXSS--Return Maximum Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MAXSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 5F /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the maximum scalar single-precision floating-point value between xmm2/m32 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 5F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the maximum scalar single-precision floating-point value between xmm3/m32 and xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 5F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the maximum scalar single-precision floating-point value between xmm3/m32 and xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MINPD--Minimum of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MINPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 5D /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the minimum double-precision floating-point values between xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 5D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the minimum double-precision floating-point values between xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 5D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the minimum packed double-precision floating-point values between ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 5D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the minimum packed double-precision floating-point values between xmm2 and xmm3/m128/m64bcst and store result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 5D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the minimum packed double-precision floating-point values between ymm2 and ymm3/m256/m64bcst and store result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{sae}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 5D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the minimum packed double-precision floating-point values between zmm2 and zmm3/m512/m64bcst and store result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MINPS--Minimum of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MINPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 5D /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the minimum single-precision floating-point values between xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 5D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the minimum single-precision floating-point values between xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 5D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the minimum single double-precision floating-point values between ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 5D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the minimum packed single-precision floating-point values between xmm2 and xmm3/m128/m32bcst and store result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 5D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the minimum packed single-precision floating-point values between ymm2 and ymm3/m256/m32bcst and store result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{sae}</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 5D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the minimum packed single-precision floating-point values between zmm2 and zmm3/m512/m32bcst and store result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MINSD--Return Minimum Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MINSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 5D /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the minimum scalar double-precision floatingpoint value between xmm2/m64 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 5D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the minimum scalar double-precision floatingpoint value between xmm3/m64 and xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 5D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the minimum scalar double-precision floatingpoint value between xmm3/m64 and xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MINSS--Return Minimum Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MINSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 5D /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the minimum scalar single-precision floatingpoint value between xmm2/m32 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 5D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the minimum scalar single-precision floatingpoint value between xmm3/m32 and xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 5D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Return the minimum scalar single-precision floatingpoint value between xmm3/m32 and xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVAPD--Move Aligned Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVAPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 28 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVAPD</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">66 0F 29 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 28 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG 29 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 28 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from ymm2/mem to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>ymm2/m256,ymm1</args>
+			<opc openc="MR">VEX.256.66.0F.WIG 29 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from ymm1 to ymm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.66.0F.W1 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.66.0F.W1 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.66.0F.W1 28 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>xmm2/m128 {k1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.66.0F.W1 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>ymm2/m256 {k1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.66.0F.W1 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.66.0F.W1 29 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVAPS--Move Aligned Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVAPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 28 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVAPS</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">0F 29 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.0F.WIG 28 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">VEX.128.0F.WIG 29 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.0F.WIG 28 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from ymm2/mem to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>ymm2/m256,ymm1</args>
+			<opc openc="MR">VEX.256.0F.WIG 29 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from ymm1 to ymm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.0F.W0 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.0F.W0 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.0F.W0 28 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>xmm2/m128 {k1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.0F.W0 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>ymm2/m256 {k 1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.0F.W0 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.0F.W0 29 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floating-point values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVD/MOVQ--Move Doubleword and Quadword.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVD</mnem>
+			<args>xmm1,r32/m32</args>
+			<opc openc="MR">66 0F 6E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move doubleword from r/m32 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOVQ</mnem>
+			<args>xmm1,r64/m64</args>
+			<opc openc="MR">66 REX.W 0F 6E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move quadword from r/m64 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVD</mnem>
+			<args>xmm1,r32/m32</args>
+			<opc openc="MR">VEX.128.66.0F.W0 6E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move doubleword from r/m32 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>xmm1,r64/m64</args>
+			<opc openc="MR">VEX.128.66.0F.W1 6E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move quadword from r/m64 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVD</mnem>
+			<args>xmm1,r32/m32</args>
+			<opc openc="T1S">EVEX.128.66.0F.W0 6E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move doubleword from r/m32 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>xmm1,r64/m64</args>
+			<opc openc="T1S">EVEX.128.66.0F.W1 6E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move quadword from r/m64 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVD</mnem>
+			<args>r32/m32,xmm1</args>
+			<opc openc="MR">66 0F 7E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move doubleword from xmm1 register to r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOVQ</mnem>
+			<args>r64/m64,xmm1</args>
+			<opc openc="MR">66 REX.W 0F 7E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm1 register to r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVD</mnem>
+			<args>r32/m32,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.W0 7E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move doubleword from xmm1 register to r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>r64/m64,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.W1 7E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm1 register to r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVD</mnem>
+			<args>r32/m32,xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F.W0 7E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move doubleword from xmm1 register to r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>r64/m64,xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F.W1 7E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm1 register to r/m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVQ--Move Quadword.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F3 0F 7E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm2/m64 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG 7E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm2/m64 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="T1S">EVEX.128.F3.0F.W1 7E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm2/m64 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVQ</mnem>
+			<args>xmm1/m64,xmm2</args>
+			<opc openc="MR">66 0F D6 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm2 register to xmm1/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>xmm1/m64,xmm2</args>
+			<opc openc="MR">VEX.128.66.0F.WIG D6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm2 register to xmm1/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>xmm1/m64,xmm2</args>
+			<opc openc="T1S">EVEX.128.66.0F.W1 D6 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm2 register to xmm1/m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVDDUP--Replicate Double FP Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVDDUP</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 12 /r</opc>
+			<cpuid>
+				<flag>SSE3</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from xmm2/m64 and duplicate into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDDUP</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.F2.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from xmm2/m64 and duplicate into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDDUP</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.F2.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move even index double-precision floating-point values from ymm2/mem and duplicate each element into ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDDUP</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="DUP">EVEX.128.F2.0F.W1 12 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from xmm2/m64 and duplicate each element into xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDDUP</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="DUP">EVEX.256.F2.0F.W1 12 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move even index double-precision floating-point values from ymm2/m256 and duplicate each element into ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDDUP</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="DUP">EVEX.512.F2.0F.W1 12 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move even index double-precision floating-point values from zmm2/m512 and duplicate each element into zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="DUP-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVDQA,VMOVDQA32/64--Move Aligned Packed Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVDQA</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 6F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move aligned packed integer values from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVDQA</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">66 0F 7F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move aligned packed integer values from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 6F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed integer values from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG 7F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed integer values from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 6F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed integer values from ymm2/mem to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA</mnem>
+			<args>ymm2/m256,ymm1</args>
+			<opc openc="MR">VEX.256.66.0F.WIG 7F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed integer values from ymm1 to ymm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA32</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.66.0F.W0 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed doubleword integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA32</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.66.0F.W0 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed doubleword integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA32</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.66.0F.W0 6F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed doubleword integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA32</mnem>
+			<args>xmm2/m128 {k1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.66.0F.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed doubleword integer values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA32</mnem>
+			<args>ymm2/m256 {k1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.66.0F.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed doubleword integer values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA32</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.66.0F.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed doubleword integer values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA64</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.66.0F.W1 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned quadword integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA64</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.66.0F.W1 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned quadword integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA64</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.66.0F.W1 6F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed quadword integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA64</mnem>
+			<args>xmm2/m128 {k1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.66.0F.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed quadword integer values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA64</mnem>
+			<args>ymm2/m256 {k1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.66.0F.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed quadword integer values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA64</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.66.0F.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move aligned packed quadword integer values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVDQU,VMOVDQU8/16/32/64--Move Unaligned Packed Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVDQU</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">F3 0F 6F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed integer values from xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVDQU</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">F3 0F 7F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed integer values from xmm1 to xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG 6F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed integer values from xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">VEX.128.F3.0F.WIG 7F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed integer values from xmm1 to xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.F3.0F.WIG 6F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed integer values from ymm2/m256 to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU</mnem>
+			<args>ymm2/m256,ymm1</args>
+			<opc openc="MR">VEX.256.F3.0F.WIG 7F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed integer values from ymm1 to ymm2/m256.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU8</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.F2.0F.W0 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed byte integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU8</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.F2.0F.W0 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed byte integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU8</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.F2.0F.W0 6F /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed byte integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU8</mnem>
+			<args>xmm2/m128 {k1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.F2.0F.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed byte integer values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU8</mnem>
+			<args>ymm2/m256 {k 1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.F2.0F.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed byte integer values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU8</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.F2.0F.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed byte integer values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU16</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.F2.0F.W1 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed word integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU16</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.F2.0F.W1 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed word integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU16</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.F2.0F.W1 6F /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed word integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU16</mnem>
+			<args>xmm2/m128 {k1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.F2.0F.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed word integer values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU16</mnem>
+			<args>ymm2/m256 {k1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.F2.0F.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed word integer values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU16</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.F2.0F.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed word integer values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU32</mnem>
+			<args>xmm1 {k1}{z},xmm2/mm128</args>
+			<opc openc="FVM">EVEX.128.F3.0F.W0 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed doubleword integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU32</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.F3.0F.W0 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed doubleword integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU32</mnem>
+			<args>zmm1 {k 1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.F3.0F.W0 6F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed doubleword integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU32</mnem>
+			<args>xmm2/m128 {k1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.F3.0F.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed doubleword integer values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU32</mnem>
+			<args>ymm2/m256 {k1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.F3.0F.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed doubleword integer values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU32</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.F3.0F.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed doubleword integer values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU64</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.F3.0F.W1 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed quadword integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU64</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.F3.0F.W1 6F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed quadword integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU64</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.F3.0F.W1 6F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed quadword integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU64</mnem>
+			<args>xmm2/m128 {k1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.F3.0F.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed quadword integer values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU64</mnem>
+			<args>ymm2/m256 {k1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.F3.0F.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed quadword integer values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU64</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.F3.0F.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed quadword integer values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVHLPS--Move Packed Single-Precision Floating-Point Values High to Low.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVHLPS</mnem>
+			<args>xmm1,xmm2</args>
+			<opc openc="RM">0F 12 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from high quadword of xmm2 to low quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHLPS</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floating-point values from high quadword of xmm3 and low quadword of xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHLPS</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="RVM">EVEX.NDS.128.0F.W0 12 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floating-point values from high quadword of xmm3 and low quadword of xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVHPD--Move High Packed Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVHPD</mnem>
+			<args>xmm1,m64</args>
+			<opc openc="RM">66 0F 16 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from m64 to high quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPD</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 16 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge double-precision floating-point value from m64 and the low quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPD</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="T1S">EVEX.NDS.128.66.0F.W1 16 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Merge double-precision floating-point value from m64 and the low quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVHPD</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">66 0F 17 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from high quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPD</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG 17 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from high quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPD</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F.W1 17 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from high quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVHPS--Move High Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVHPS</mnem>
+			<args>xmm1,m64</args>
+			<opc openc="RM">0F 16 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from m64 to high quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPS</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 16 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floating-point values from m64 and the low quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPS</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="T2">EVEX.NDS.128.0F.W0 16 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floating-point values from m64 and the low quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVHPS</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">0F 17 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from high quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPS</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">VEX.128.0F.WIG 17 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from high quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPS</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="T2">EVEX.128.0F.W0 17 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from high quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVLHPS--Move Packed Single-Precision Floating-Point Values Low to High.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVLHPS</mnem>
+			<args>xmm1,xmm2</args>
+			<opc openc="RM">0F 16 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from low quadword of xmm2 to high quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLHPS</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 16 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floating-point values from low quadword of xmm3 and low quadword of xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLHPS</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="RVM">EVEX.NDS.128.0F.W0 16 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floating-point values from low quadword of xmm3 and low quadword of xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVLPD--Move Low Packed Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVLPD</mnem>
+			<args>xmm1,m64</args>
+			<opc openc="RM">66 0F 12 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from m64 to low quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPD</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge double-precision floating-point value from m64 and the high quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPD</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="T1S">EVEX.NDS.128.66.0F.W1 12 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Merge double-precision floating-point value from m64 and the high quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVLPD</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">66 0F 13/r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from low quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPD</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG 13/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from low quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPD</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F.W1 13/r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from low quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVLPS--Move Low Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVLPS</mnem>
+			<args>xmm1,m64</args>
+			<opc openc="RM">0F 12 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from m64 to low quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPS</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floating-point values from m64 and the high quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPS</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="T2">EVEX.NDS.128.0F.W0 12 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floating-point values from m64 and the high quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVLPS</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">0F 13/r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from low quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPS</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">VEX.128.0F.WIG 13/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from low quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPS</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="T2">EVEX.128.0F.W0 13/r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floating-point values from low quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T2-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVNTDQA--Load Double Quadword Non-Temporal Aligned Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVNTDQA</mnem>
+			<args>xmm1,m128</args>
+			<opc openc="RM">66 0F 38 2A /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Move double quadword from m128 to xmm1 using nontemporal hint if WC memory type.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQA</mnem>
+			<args>xmm1,m128</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 2A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move double quadword from m128 to xmm using nontemporal hint if WC memory type.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQA</mnem>
+			<args>ymm1,m256</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 2A /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Move 256-bit data from m256 to ymm using non-temporal hint if WC memory type.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQA</mnem>
+			<args>xmm1,m128</args>
+			<opc openc="FVM">EVEX.128.66.0F38.W0 2A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move 128-bit data from m128 to xmm using non-temporal hint if WC memory type.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQA</mnem>
+			<args>ymm1,m256</args>
+			<opc openc="FVM">EVEX.256.66.0F38.W0 2A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move 256-bit data from m256 to ymm using non-temporal hint if WC memory type.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQA</mnem>
+			<args>zmm1,m512</args>
+			<opc openc="FVM">EVEX.512.66.0F38.W0 2A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move 512-bit data from m512 to zmm using non-temporal hint if WC memory type.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVNTDQ--Store Packed Integers Using Non-Temporal Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVNTDQ</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="MR">66 0F E7 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move packed integer values in xmm1 to m128 using nontemporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQ</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG E7 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move packed integer values in xmm1 to m128 using nontemporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQ</mnem>
+			<args>m256,ymm1</args>
+			<opc openc="MR">VEX.256.66.0F.WIG E7 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move packed integer values in ymm1 to m256 using nontemporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQ</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="FVM">EVEX.128.66.0F.W0 E7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move packed integer values in xmm1 to m128 using nontemporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQ</mnem>
+			<args>m256,ymm1</args>
+			<opc openc="FVM">EVEX.256.66.0F.W0 E7 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move packed integer values in zmm1 to m256 using nontemporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQ</mnem>
+			<args>m512,zmm1</args>
+			<opc openc="FVM">EVEX.512.66.0F.W0 E7 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move packed integer values in zmm1 to m512 using nontemporal hint.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVNTPD--Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVNTPD</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="MR">66 0F 2B /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision values in xmm1 to m128 using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPD</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG 2B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision values in xmm1 to m128 using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPD</mnem>
+			<args>m256,ymm1</args>
+			<opc openc="MR">VEX.256.66.0F.WIG 2B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision values in ymm1 to m256 using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPD</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="FVM">EVEX.128.66.0F.W1 2B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision values in xmm1 to m128 using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPD</mnem>
+			<args>m256,ymm1</args>
+			<opc openc="FVM">EVEX.256.66.0F.W1 2B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision values in ymm1 to m256 using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPD</mnem>
+			<args>m512,zmm1</args>
+			<opc openc="FVM">EVEX.512.66.0F.W1 2B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision values in zmm1 to m512 using non-temporal hint.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVNTPS--Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVNTPS</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="MR">0F 2B /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision values xmm1 to mem using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPS</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="MR">VEX.128.0F.WIG 2B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision values xmm1 to mem using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPS</mnem>
+			<args>m256,ymm1</args>
+			<opc openc="MR">VEX.256.0F.WIG 2B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision values ymm1 to mem using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPS</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="FVM">EVEX.128.0F.W0 2B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision values in xmm1 to m128 using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPS</mnem>
+			<args>m256,ymm1</args>
+			<opc openc="FVM">EVEX.256.0F.W0 2B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision values in ymm1 to m256 using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPS</mnem>
+			<args>m512,zmm1</args>
+			<opc openc="FVM">EVEX.512.0F.W0 2B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision values in zmm1 to m512 using non-temporal hint.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVSD--Move or Merge Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSD</mnem>
+			<args>xmm1,xmm2</args>
+			<opc openc="RM">F2 0F 10 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move scalar double-precision floating-point value from xmm2 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSD</mnem>
+			<args>xmm1,m64</args>
+			<opc openc="RM">F2 0F 10 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Load scalar double-precision floating-point value from m64 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSD</mnem>
+			<args>xmm1/m64,xmm2</args>
+			<opc openc="MR">F2 0F 11 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move scalar double-precision floating-point value from xmm2 register to xmm1/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="RVM">VEX.NDS.LIG.F2.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge scalar double-precision floating-point value from xmm2 and xmm3 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>xmm1,m64</args>
+			<opc openc="XM">VEX.LIG.F2.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Load scalar double-precision floating-point value from m64 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="MVR">VEX.NDS.LIG.F2.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge scalar double-precision floating-point value from xmm2 and xmm3 registers to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">VEX.LIG.F2.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Store scalar double-precision floating-point value from xmm1 register to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3</args>
+			<opc openc="RVM">EVEX.NDS.LIG.F2.0F.W1 10 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Merge scalar double-precision floating-point value from xmm2 and xmm3 registers to xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>xmm1 {k1}{z},m64</args>
+			<opc openc="T1S">EVEX.LIG.F2.0F.W1 10 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Load scalar double-precision floating-point value from m64 to xmm1 register under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3</args>
+			<opc openc="MVR">EVEX.NDS.LIG.F2.0F.W1 11 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Merge scalar double-precision floating-point value from xmm2 and xmm3 registers to xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>m64 {k1},xmm1</args>
+			<opc openc="T1S">EVEX.LIG.F2.0F.W1 11 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Store scalar double-precision floating-point value from xmm1 register to m64 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="XM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MVR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>vvvv(r)</oprnd2>
+			<oprnd3>ModRM:reg(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVSHDUP--Replicate Single FP Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSHDUP</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">F3 0F 16 /r</opc>
+			<cpuid>
+				<flag>SSE3</flag>
+			</cpuid>
+			<dscrp>Move odd index single-precision floating-point values from xmm2/mem and duplicate each element into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSHDUP</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG 16 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move odd index single-precision floating-point values from xmm2/mem and duplicate each element into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSHDUP</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.F3.0F.WIG 16 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move odd index single-precision floating-point values from ymm2/mem and duplicate each element into ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSHDUP</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.F3.0F.W0 16 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move odd index single-precision floating-point values from xmm2/m128 and duplicate each element into xmm1 under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSHDUP</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.F3.0F.W0 16 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move odd index single-precision floating-point values from ymm2/m256 and duplicate each element into ymm1 under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSHDUP</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.F3.0F.W0 16 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move odd index single-precision floating-point values from zmm2/m512 and duplicate each element into zmm1 under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVSLDUP--Replicate Single FP Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSLDUP</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="A">F3 0F 12 /r</opc>
+			<cpuid>
+				<flag>SSE3</flag>
+			</cpuid>
+			<dscrp>Move even index single-precision floating-point values from xmm2/mem and duplicate each element into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSLDUP</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move even index single-precision floating-point values from xmm2/mem and duplicate each element into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSLDUP</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.F3.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move even index single-precision floating-point values from ymm2/mem and duplicate each element into ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSLDUP</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.F3.0F.W0 12 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move even index single-precision floating-point values from xmm2/m128 and duplicate each element into xmm1 under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSLDUP</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.F3.0F.W0 12 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move even index single-precision floating-point values from ymm2/m256 and duplicate each element into ymm1 under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSLDUP</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.F3.0F.W0 12 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move even index single-precision floating-point values from zmm2/m512 and duplicate each element into zmm1 under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVSS--Move or Merge Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSS</mnem>
+			<args>xmm1,xmm2</args>
+			<opc openc="RM">F3 0F 10 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Merge scalar single-precision floating-point value from xmm2 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSS</mnem>
+			<args>xmm1,m32</args>
+			<opc openc="RM">F3 0F 10 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Load scalar single-precision floating-point value from m32 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="RVM">VEX.NDS.LIG.F3.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge scalar single-precision floating-point value from xmm2 and xmm3 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>xmm1,m32</args>
+			<opc openc="XM">VEX.LIG.F3.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Load scalar single-precision floating-point value from m32 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSS</mnem>
+			<args>xmm2/m32,xmm1</args>
+			<opc openc="MR">F3 0F 11 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move scalar single-precision floating-point value from xmm1 register to xmm2/m32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="MVR">VEX.NDS.LIG.F3.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move scalar single-precision floating-point value from xmm2 and xmm3 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>m32,xmm1</args>
+			<opc openc="MR">VEX.LIG.F3.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move scalar single-precision floating-point value from xmm1 register to m32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3</args>
+			<opc openc="RVM">EVEX.NDS.LIG.F3.0F.W0 10 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move scalar single-precision floating-point value from xmm2 and xmm3 to xmm1 register under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>xmm1 {k1}{z},m32</args>
+			<opc openc="T1S">EVEX.LIG.F3.0F.W0 10 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move scalar single-precision floating-point values from m32 to xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3</args>
+			<opc openc="MVR">EVEX.NDS.LIG.F3.0F.W0 11 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move scalar single-precision floating-point value from xmm2 and xmm3 to xmm1 register under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>m32 {k1},xmm1</args>
+			<opc openc="T1S">EVEX.LIG.F3.0F.W0 11 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move scalar single-precision floating-point values from xmm1 to m32 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="XM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MVR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>vvvv(r)</oprnd2>
+			<oprnd3>ModRM:reg(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVUPD--Move Unaligned Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVUPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 10 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVUPD</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">66 0F 11 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from ymm2/mem to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>ymm2/m256,ymm1</args>
+			<opc openc="MR">VEX.256.66.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from ymm1 to ymm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.66.0F.W1 10 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>xmm2/m128 {k1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.66.0F.W1 11 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.66.0F.W1 10 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>ymm2/m256 {k1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.66.0F.W1 11 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.66.0F.W1 10 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.66.0F.W1 11 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floatingpoint values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVUPS--Move Unaligned Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVUPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 10 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVUPS</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">0F 11 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">VEX.128.0F 11.WIG /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.0F 10.WIG /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point from ymm2/mem to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>ymm2/m256,ymm1</args>
+			<opc openc="MR">VEX.256.0F 11.WIG /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point from ymm1 to ymm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.0F.W0 10 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.0F.W0 10 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.0F.W0 10 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>xmm2/m128 {k 1}{z},xmm1</args>
+			<opc openc="FVM">EVEX.128.0F.W0 11 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>ymm2/m256 {k1}{z},ymm1</args>
+			<opc openc="FVM">EVEX.256.0F.W0 11 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>zmm2/m512 {k1}{z},zmm1</args>
+			<opc openc="FVM">EVEX.512.0F.W0 11 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM-MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSADBW--Compute Sum of Absolute Differences.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSADBW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F6 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Computes the absolute differences of the packed unsigned byte integers from xmm2 /m128 and xmm1; the 8 low differences and 8 high differences are then summed separately to produce two unsigned word integer results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSADBW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F F6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes the absolute differences of the packed unsigned byte integers from xmm3 /m128 and xmm2; the 8 low differences and 8 high differences are then summed separately to produce two unsigned word integer results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSADBW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F F6 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Computes the absolute differences of the packed unsigned byte integers from ymm3 /m256 and ymm2; then each consecutive 8 differences are summed separately to produce four unsigned word integer results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSADBW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG F6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Computes the absolute differences of the packed unsigned byte integers from xmm3 /m128 and xmm2; then each consecutive 8 differences are summed separately to produce four unsigned word integer results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSADBW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG F6 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Computes the absolute differences of the packed unsigned byte integers from ymm3 /m256 and ymm2; then each consecutive 8 differences are summed separately to produce four unsigned word integer results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSADBW</mnem>
+			<args>zmm1,zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG F6 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Computes the absolute differences of the packed unsigned byte integers from zmm3 /m512 and zmm2; then each consecutive 8 differences are summed separately to produce four unsigned word integer results.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MULPD--Multiply Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MULPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 59 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values in xmm2/m128 with xmm1 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 59 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values in xmm3/m128 with xmm2 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 59 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values in ymm3/m256 with ymm2 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 59 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm3/m128/m64bcst to xmm2 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 59 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm3/m256/m64bcst to ymm2 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 59 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values in zmm3/m512/m64bcst with zmm2 and store result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MULPS--Multiply Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MULPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 59 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values in xmm2/m128 with xmm1 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 59 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values in xmm3/m128 with xmm2 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 59 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values in ymm3/m256 with ymm2 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm3/m128/m32bcst to xmm2 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm3/m256/m32bcst to ymm2 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst {er}</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values in zmm3/m512/m32bcst with zmm2 and store result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MULSD--Multiply Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MULSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 59 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply the low double-precision floating-point value in xmm2/m64 by low double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 59 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the low double-precision floating-point value in xmm3/m64 by low double-precision floating-point value in xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64 {er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 59 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply the low double-precision floating-point value in xmm3/m64 by low double-precision floating-point value in xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MULSS--Multiply Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MULSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 59 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Multiply the low single-precision floating-point value in xmm2/m32 by the low single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 59 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the low single-precision floating-point value in xmm3/m32 by the low single-precision floating-point value in xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32 {er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply the low single-precision floating-point value in xmm3/m32 by the low single-precision floating-point value in xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ORPD--Bitwise Logical OR of Packed Double Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ORPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 56/r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F 56 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F 56 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 56 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in xmm2 and xmm3/m128/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 56 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in ymm2 and ymm3/m256/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 56 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in zmm2 and zmm3/m512/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ORPS--Bitwise Logical OR of Packed Single Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ORPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 56 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed single-precision floating-point values in xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F 56 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed single-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F 56 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed single-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 56 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed single-precision floating-point values in xmm2 and xmm3/m128/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 56 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed single-precision floating-point values in ymm2 and ymm3/m256/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 56 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed single-precision floating-point values in zmm2 and zmm3/m512/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PABSB/PABSW/PABSD/PABSQ--Packed Absolute Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PABSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 1C /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of bytes in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PABSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 1D /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 16-bit integers in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PABSD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 1E /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 32-bit integers in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 1C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of bytes in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 1D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 16-bit integers in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 1E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 32-bit integers in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSB</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 1C /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of bytes in ymm2/m256 and store UNSIGNED result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSW</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 1D /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 16-bit integers in ymm2/m256 and store UNSIGNED result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSD</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 1E /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 32-bit integers in ymm2/m256 and store UNSIGNED result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSB</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.66.0F38.WIG 1C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of bytes in xmm2/m128 and store UNSIGNED result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSB</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.66.0F38.WIG 1C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of bytes in ymm2/m256 and store UNSIGNED result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSB</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.66.0F38.WIG 1C /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of bytes in zmm2/m512 and store UNSIGNED result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSW</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="FVM">EVEX.128.66.0F38.WIG 1D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 16-bit integers in xmm2/m128 and store UNSIGNED result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSW</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="FVM">EVEX.256.66.0F38.WIG 1D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 16-bit integers in ymm2/m256 and store UNSIGNED result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSW</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="FVM">EVEX.512.66.0F38.WIG 1D /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 16-bit integers in zmm2/m512 and store UNSIGNED result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W0 1E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 32-bit integers in xmm2/m128/m32bcst and store UNSIGNED result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W0 1E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 32-bit integers in ymm2/m256/m32bcst and store UNSIGNED result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W0 1E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 32-bit integers in zmm2/m512/m32bcst and store UNSIGNED result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W1 1F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 64-bit integers in xmm2/m128/m64bcst and store UNSIGNED result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W1 1F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 64-bit integers in ymm2/m256/m64bcst and store UNSIGNED result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W1 1F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 64-bit integers in zmm2/m512/m64bcst and store UNSIGNED result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PACKSSWB/PACKSSDW--Pack with Signed Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PACKSSWB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 63 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed word integers from xmm1 and from xxm2/m128 into 16 packed signed byte integers in xmm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PACKSSDW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 6B /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 packed signed word integers in xmm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSWB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F 63 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed word integers from xmm2 and from xmm3/m128 into 16 packed signed byte integers in xmm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSDW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F 6B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed signed doubleword integers from xmm2 and from xmm3/m128 into 8 packed signed word integers in xmm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSWB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F 63 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed signed word integers from ymm2 and from ymm3/m256 into 32 packed signed byte integers in ymm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSDW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F 6B /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed doubleword integers from ymm2 and from ymm3/m256 into 16 packed signed word integers in ymm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSWB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 63 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts packed signed word integers from xmm2 and from xmm3/m128 into packed signed byte integers in xmm1 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSWB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 63 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts packed signed word integers from ymm2 and from ymm3/m256 into packed signed byte integers in ymm1 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSWB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 63 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts packed signed word integers from zmm2 and from zmm3/m512 into packed signed byte integers in zmm1 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSDW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 6B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts packed signed doubleword integers from xmm2 and from xmm3/m128/m32bcst into packed signed word integers in xmm1 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSDW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 6B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts packed signed doubleword integers from ymm2 and from ymm3/m256/m32bcst into packed signed word integers in ymm1 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSDW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 6B /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts packed signed doubleword integers from zmm2 and from zmm3/m512/m32bcst into packed signed word integers in zmm1 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PACKUSDW--Pack with Unsigned Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PACKUSDW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 2B /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Convert 4 packed signed doubleword integers from xmm1 and 4 packed signed doubleword integers from xmm2/m128 into 8 packed unsigned word integers in xmm1 using unsigned saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSDW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38 2B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert 4 packed signed doubleword integers from xmm2 and 4 packed signed doubleword integers from xmm3/m128 into 8 packed unsigned word integers in xmm1 using unsigned saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSDW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38 2B /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Convert 8 packed signed doubleword integers from ymm2 and 8 packed signed doubleword integers from ymm3/m256 into 16 packed unsigned word integers in ymm1 using unsigned saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSDW</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 2B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Convert packed signed doubleword integers from xmm2 and packed signed doubleword integers from xmm3/m128/m32bcst into packed unsigned word integers in xmm1 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSDW</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 2B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Convert packed signed doubleword integers from ymm2 and packed signed doubleword integers from ymm3/m256/m32bcst into packed unsigned word integers in ymm1 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSDW</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 2B /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Convert packed signed doubleword integers from zmm2 and packed signed doubleword integers from zmm3/m512/m32bcst into packed unsigned word integers in zmm1 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PACKUSWB--Pack with Unsigned Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PACKUSWB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 67 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Converts 8 signed word integers from xmm1 and 8 signed word integers from xmm2/m128 into 16 unsigned byte integers in xmm1 using unsigned saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSWB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F 67 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Converts 8 signed word integers from xmm2 and 8 signed word integers from xmm3/m128 into 16 unsigned byte integers in xmm1 using unsigned saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSWB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F 67 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Converts 16 signed word integers from ymm2 and 16 signed word integers from ymm3/m256 into 32 unsigned byte integers in ymm1 using unsigned saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSWB</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 67 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts signed word integers from xmm2 and signed word integers from xmm3/m128 into unsigned byte integers in xmm1 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSWB</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 67 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts signed word integers from ymm2 and signed word integers from ymm3/m256 into unsigned byte integers in ymm1 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSWB</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 67 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts signed word integers from zmm2 and signed word integers from zmm3/m512 into unsigned byte integers in zmm1 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PADDB/PADDW/PADDD/PADDQ--Add Packed Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F FC /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed byte integers from xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F FD /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed word integers from xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F FE /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed doubleword integers from xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D4 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed quadword integers from xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FC /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed byte integers from xmm2, and xmm3/m128 and store in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FD /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed word integers from xmm2, xmm3/m128 and store in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FE /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed doubleword integers from xmm2, xmm3/m128 and store in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D4 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed quadword integers from xmm2, xmm3/m128 and store in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FC /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed byte integers from ymm2, and ymm3/m256 and store in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FD /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed word integers from ymm2, ymm3/m256 and store in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FE /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed doubleword integers from ymm2, ymm3/m256 and store in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D4 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed quadword integers from ymm2, ymm3/m256 and store in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG FC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed byte integers from xmm2, and xmm3/m128 and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG FD /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed word integers from xmm2, and xmm3/m128 and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 FE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed doubleword integers from xmm2, and xmm3/m128/m32bcst and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 D4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed quadword integers from xmm2, and xmm3/m128/m64bcst and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG FC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed byte integers from ymm2, and ymm3/m256 and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG FD /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed word integers from ymm2, and ymm3/m256 and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 FE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed doubleword integers from ymm2, ymm3/m256/m32bcst and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 D4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed quadword integers from ymm2, ymm3/m256/m64bcst and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG FC /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed byte integers from zmm2, and zmm3/m512 and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG FD /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed word integers from zmm2, and zmm3/m512 and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 FE /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed doubleword integers from zmm2, zmm3/m512/m32bcst and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 D4 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Add packed quadword integers from zmm2, zmm3/m512/m64bcst and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PADDSB/PADDSW--Add Packed Signed Integers with Signed Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F EC /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed signed byte integers from xmm2/m128 and xmm1 and saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F ED /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed signed word integers from xmm2/m128 and xmm1 and saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F EC</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed signed byte integers from xmm2, and xmm3/m128 and store the saturated results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F ED</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed signed word integers from xmm2, and xmm3/m128 and store the saturated results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F EC</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed signed byte integers from ymm2, and ymm3/m256 and store the saturated results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F ED</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed signed word integers from ymm2, and ymm3/m256 and store the saturated results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG EC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed signed byte integers from xmm2, and xmm3/m128 and store the saturated results in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG EC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed signed byte integers from ymm2, and ymm3/m256 and store the saturated results in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG EC /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed signed byte integers from zmm2, and zmm3/m512 and store the saturated results in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG ED /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed signed word integers from xmm2, and xmm3/m128 and store the saturated results in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG ED /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed signed word integers from ymm2, and ymm3/m256 and store the saturated results in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG ED /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed signed word integers from zmm2, and zmm3/m512 and store the saturated results in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PADDUSB/PADDUSW--Add Packed Unsigned Integers with Unsigned Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDUSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F DC /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned byte integers from xmm2/m128 and xmm1 and saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDUSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F DD /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned word integers from xmm2/m128 and xmm1 and saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F DC</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned byte integers from xmm2, and xmm3/m128 and store the saturated results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F DD</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned word integers from xmm2, and xmm3/m128 and store the saturated results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F DC</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned byte integers from ymm2, and ymm3/m256 and store the saturated results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F DD</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned word integers from ymm2, and ymm3/m256 and store the saturated results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG DC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned byte integers from xmm2, and xmm3/m128 and store the saturated results in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG DC /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned byte integers from ymm2, and ymm3/m256 and store the saturated results in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG DC /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned byte integers from zmm2, and zmm3/m512 and store the saturated results in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG DD /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned word integers from xmm2, and xmm3/m128 and store the saturated results in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG DD /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned word integers from ymm2, and ymm3/m256 and store the saturated results in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG DD /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned word integers from zmm2, and zmm3/m512 and store the saturated results in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PALIGNR--Byte Align.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PALIGNR</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RM">66 0F 3A 0F /r ib</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Concatenate destination and source operands, extract byte aligned result shifted to the right by constant value in imm8 and result is stored in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPALIGNR</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F3A 0F /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Concatenate xmm2 and xmm3/m128 into a 32-byte intermediate result, extract byte aligned result shifted to the right by constant value in imm8 and result is stored in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPALIGNR</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F3A 0F /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Concatenate pairs of 16 bytes in ymm2 and ymm3/m256 into 32-byte intermediate result, extract byte-aligned, 16-byte result shifted to the right by constant values in imm8 from each intermediate result, and two 16-byte results are stored in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPALIGNR</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F3A.WIG 0F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Concatenate xmm2 and xmm3/m128 into a 32-byte intermediate result, extract byte aligned result shifted to the right by constant value in imm8 and result is stored in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPALIGNR</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F3A.WIG 0F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Concatenate pairs of 16 bytes in ymm2 and ymm3/m256 into 32-byte intermediate result, extract byte-aligned, 16-byte result shifted to the right by constant values in imm8 from each intermediate result, and two 16-byte results are stored in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPALIGNR</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F3A.WIG 0F /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Concatenate pairs of 16 bytes in zmm2 and zmm3/m512 into 32-byte intermediate result, extract byte-aligned, 16-byte result shifted to the right by constant values in imm8 from each intermediate result, and four 16-byte results are stored in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PAND--Logical AND.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PAND</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F DB /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAND</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG DB /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of xmm2, and xmm3/m128 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAND</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG DB /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of ymm2, and ymm3/m256 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 DB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed doubleword integers in xmm2 and xmm3/m128/m32bcst and store result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 DB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed doubleword integers in ymm2 and ymm3/m256/m32bcst and store result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 DB /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed doubleword integers in zmm2 and zmm3/m512/m32bcst and store result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 DB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed quadword integers in xmm2 and xmm3/m128/m64bcst and store result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 DB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed quadword integers in ymm2 and ymm3/m256/m64bcst and store result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 DB /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed quadword integers in zmm2 and zmm3/m512/m64bcst and store result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PANDN--Logical AND NOT.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PANDN</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F DF /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDN</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG DF /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of xmm2, and xmm3/m128 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDN</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG DF /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of ymm2, and ymm3/m256 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDND</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 DF /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of packed doubleword integers in xmm2 and xmm3/m128/m32bcst and store result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDND</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 DF /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of packed doubleword integers in ymm2 and ymm3/m256/m32bcst and store result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDND</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 DF /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of packed doubleword integers in zmm2 and zmm3/m512/m32bcst and store result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDNQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 DF /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of packed quadword integers in xmm2 and xmm3/m128/m64bcst and store result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDNQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 DF /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of packed quadword integers in ymm2 and ymm3/m256/m64bcst and store result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDNQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 DF /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of packed quadword integers in zmm2 and zmm3/m512/m64bcst and store result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PAVGB/PAVGW--Average Packed Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PAVGB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E0,/r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned byte integers from xmm2/m128 and xmm1 with rounding.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PAVGW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E3,/r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F E0</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned byte integers from xmm2, and xmm3/m128 with rounding and store to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F E3</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned word integers from xmm2, xmm3/m128 with rounding to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F E0</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned byte integers from ymm2, and ymm3/m256 with rounding and store to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F E3</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned word integers from ymm2, ymm3/m256 with rounding to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG E0 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned byte integers from xmm2, and xmm3/m128 with rounding and store to xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG E0 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned byte integers from ymm2, and ymm3/m256 with rounding and store to ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG E0 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned byte integers from zmm2, and zmm3/m512 with rounding and store to zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG E3 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned word integers from xmm2, xmm3/m128 with rounding to xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG E3 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned word integers from ymm2, ymm3/m256 with rounding to ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG E3 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned word integers from zmm2, zmm3/m512 with rounding to zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPBROADCASTM--Broadcast Mask to Vector Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTMB2Q</mnem>
+			<args>xmm1,k1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W1 2A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Broadcast low byte value in k1 to two locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTMB2Q</mnem>
+			<args>ymm1,k1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W1 2A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Broadcast low byte value in k1 to four locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTMB2Q</mnem>
+			<args>zmm1,k1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W1 2A /r</opc>
+			<cpuid>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Broadcast low byte value in k1 to eight locations in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTMW2D</mnem>
+			<args>xmm1,k1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W0 3A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Broadcast low word value in k1 to four locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTMW2D</mnem>
+			<args>ymm1,k1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W0 3A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Broadcast low word value in k1 to eight locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTMW2D</mnem>
+			<args>zmm1,k1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W0 3A /r</opc>
+			<cpuid>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Broadcast low word value in k1 to sixteen locations in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PCMPEQB/PCMPEQW/PCMPEQD/PCMPEQQ--Compare Packed Integers for Equality.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPEQB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 74 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed bytes in xmm2/m128 and xmm1 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPEQW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 75 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed words in xmm2/m128 and xmm1 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPEQD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 76 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed doublewords in xmm2/m128 and xmm1 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPEQQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 29 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed quadwords in xmm2/m128 and xmm1 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQB</mnem>
+			<args>xmm1,xmm2,xmm3 /m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 74 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed bytes in xmm3/m128 and xmm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 75 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed words in xmm3/m128 and xmm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 76 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed doublewords in xmm3/m128 and xmm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 29 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed quadwords in xmm3/m128 and xmm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQB</mnem>
+			<args>ymm1,ymm2,ymm3 /m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 74 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed bytes in ymm3/m256 and ymm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQW</mnem>
+			<args>ymm1,ymm2,ymm3 /m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 75 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed words in ymm3/m256 and ymm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQD</mnem>
+			<args>ymm1,ymm2,ymm3 /m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 76 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed doublewords in ymm3/m256 and ymm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQQ</mnem>
+			<args>ymm1,ymm2,ymm3 /m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 29 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed quadwords in ymm3/m256 and ymm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQD</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 76 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Equal between int32 vector xmm2 and int32 vector xmm3/m128/m32bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQD</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 76 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Equal between int32 vector ymm2 and int32 vector ymm3/m256/m32bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQD</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 76 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Equal between int32 vectors in zmm2 and zmm3/m512/m32bcst, and set destination k1 according to the comparison results under writemask k2,.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQQ</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Equal between int64 vector xmm2 and int64 vector xmm3/m128/m64bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQQ</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Equal between int64 vector ymm2 and int64 vector ymm3/m256/m64bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQQ</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 29 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Equal between int64 vector zmm2 and int64 vector zmm3/m512/m64bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQB</mnem>
+			<args>k1 {k2},xmm2,xmm3 /m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 74 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed bytes in xmm3/m128 and xmm2 for equality and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQB</mnem>
+			<args>k1 {k2},ymm2,ymm3 /m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 74 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed bytes in ymm3/m256 and ymm2 for equality and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQB</mnem>
+			<args>k1 {k2},zmm2,zmm3 /m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 74 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed bytes in zmm3/m512 and zmm2 for equality and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQW</mnem>
+			<args>k1 {k2},xmm2,xmm3 /m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 75 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed words in xmm3/m128 and xmm2 for equality and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQW</mnem>
+			<args>k1 {k2},ymm2,ymm3 /m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 75 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed words in ymm3/m256 and ymm2 for equality and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQW</mnem>
+			<args>k1 {k2},zmm2,zmm3 /m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 75 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed words in zmm3/m512 and zmm2 for equality and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PCMPGTB/PCMPGTW/PCMPGTD/PCMPGTQ--Compare Packed Integers for Greater Than.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPGTB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 64 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm1 and xmm2/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPGTW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 65 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm1 and xmm2/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPGTD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 66 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed doubleword integers in xmm1 and xmm2/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPGTQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 37 /r</opc>
+			<cpuid>
+				<flag>SSE4_2</flag>
+			</cpuid>
+			<dscrp>Compare packed qwords in xmm2/m128 and xmm1 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 64 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 65 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm2 and xmm3/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 66 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed doubleword integers in xmm2 and xmm3/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 37 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qwords in xmm2 and xmm3/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 64 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 65 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in ymm2 and ymm3/m256 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 66 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed doubleword integers in ymm2 and ymm3/m256 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 37 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qwords in ymm2 and ymm3/m256 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTD</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 66 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Greater between int32 vector xmm2 and int32 vector xmm3/m128/m32bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTD</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 66 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Greater between int32 vector ymm2 and int32 vector ymm3/m256/m32bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTD</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 66 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Greater between int32 elements in zmm2 and zmm3/m512/m32bcst, and set destination k1 according to the comparison results under writemask. k2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTQ</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 37 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Greater between int64 vector xmm2 and int64 vector xmm3/m128/m64bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTQ</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 37 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Greater between int64 vector ymm2 and int64 vector ymm3/m256/m64bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTQ</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 37 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare Greater between int64 vector zmm2 and int64 vector zmm3/m512/m64bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTB</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 64 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 for greater than, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTB</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 64 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 for greater than, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTB</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 64 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in zmm2 and zmm3/m512 for greater than, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTW</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 65 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm2 and xmm3/m128 for greater than, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTW</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 65 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in ymm2 and ymm3/m256 for greater than, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTW</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 65 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in zmm2 and zmm3/m512 for greater than, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPCMPB/VPCMPUB--Compare Packed Byte Values Into Mask.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPB</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F3A.W0 3F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte values in xmm3/m128 and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPB</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F3A.W0 3F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte values in ymm3/m256 and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPB</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F3A.W0 3F /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte values in zmm3/m512 and zmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUB</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F3A.W0 3E /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte values in xmm3/m128 and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUB</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F3A.W0 3E /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte values in ymm3/m256 and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUB</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F3A.W0 3E /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte values in zmm3/m512 and zmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPCMPD/VPCMPUD--Compare Packed Integer Values into Mask.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPD</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W0 1F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed doubleword integer values in xmm3/m128/m32bcst and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPD</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 1F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed doubleword integer values in ymm3/m256/m32bcst and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPD</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 1F /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed doubleword integer values in zmm2 and zmm3/m512/m32bcst using bits 2:0 of imm8 as a comparison predicate. The comparison results are written to the destination k1 under writemask k2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUD</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W0 1E /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned doubleword integer values in xmm3/m128/m32bcst and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUD</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 1E /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned doubleword integer values in ymm3/m256/m32bcst and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUD</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 1E /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned doubleword integer values in zmm2 and zmm3/m512/m32bcst using bits 2:0 of imm8 as a comparison predicate. The comparison results are written to the destination k1 under writemask k2.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPCMPQ/VPCMPUQ--Compare Packed Integer Values into Mask.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPQ</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W1 1F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed quadword integer values in xmm3/m128/m64bcst and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPQ</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 1F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed quadword integer values in ymm3/m256/m64bcst and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPQ</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 1F /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed quadword integer values in zmm3/m512/m64bcst and zmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUQ</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W1 1E /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned quadword integer values in xmm3/m128/m64bcst and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUQ</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 1E /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned quadword integer values in ymm3/m256/m64bcst and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUQ</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 1E /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned quadword integer values in zmm3/m512/m64bcst and zmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPCMPW/VPCMPUW--Compare Packed Word Values Into Mask.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPW</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F3A.W1 3F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm3/m128 and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPW</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F3A.W1 3F /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in ymm3/m256 and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPW</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F3A.W1 3F /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in zmm3/m512 and zmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUW</mnem>
+			<args>k1 {k2},xmm2,xmm3/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F3A.W1 3E /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in xmm3/m128 and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUW</mnem>
+			<args>k1 {k2},ymm2,ymm3/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F3A.W1 3E /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in ymm3/m256 and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPUW</mnem>
+			<args>k1 {k2},zmm2,zmm3/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F3A.W1 3E /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in zmm3/m512 and zmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPCOMPRESSD--Store Sparse Packed Doubleword Integer Values into Dense Memory/Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCOMPRESSD</mnem>
+			<args>xmm1/m128 {k1}{z},xmm2</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 8B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed doubleword integer values from xmm2 to xmm1/m128 using controlmask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCOMPRESSD</mnem>
+			<args>ymm1/m256 {k1}{z},ymm2</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 8B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed doubleword integer values from ymm2 to ymm1/m256 using controlmask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCOMPRESSD</mnem>
+			<args>zmm1/m512 {k1}{z},zmm2</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 8B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed doubleword integer values from zmm2 to zmm1/m512 using controlmask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPCOMPRESSQ--Store Sparse Packed Quadword Integer Values into Dense Memory/Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCOMPRESSQ</mnem>
+			<args>xmm1/m128 {k1}{z},xmm2</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 8B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed quadword integer values from xmm2 to xmm1/m128 using controlmask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCOMPRESSQ</mnem>
+			<args>ymm1/m256 {k1}{z},ymm2</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 8B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed quadword integer values from ymm2 to ymm1/m256 using controlmask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCOMPRESSQ</mnem>
+			<args>zmm1/m512 {k1}{z},zmm2</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 8B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compress packed quadword integer values from zmm2 to zmm1/m512 using controlmask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPCONFLICTD/Q--Detect Conflicts Within a Vector of Packed Dword/Qword Values into Dense Memory/ Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCONFLICTD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W0 C4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Detect duplicate double-word values in xmm2/m128/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCONFLICTD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W0 C4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Detect duplicate double-word values in ymm2/m256/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCONFLICTD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W0 C4 /r</opc>
+			<cpuid>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Detect duplicate double-word values in zmm2/m512/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCONFLICTQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W1 C4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Detect duplicate quad-word values in xmm2/m128/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCONFLICTQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W1 C4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Detect duplicate quad-word values in ymm2/m256/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCONFLICTQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W1 C4 /r</opc>
+			<cpuid>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Detect duplicate quad-word values in zmm2/m512/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMB--Permute Packed Bytes Elements.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.W0 8D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Permute bytes in xmm3/m128 using byte indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W0 8D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Permute bytes in ymm3/m256 using byte indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W0 8D /r</opc>
+			<cpuid>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Permute bytes in zmm3/m512 using byte indexes in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMD/VPERMW--Permute Packed Doublewords/Words Elements.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 36 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Permute doublewords in ymm3/m256 using indices in ymm2 and store the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 36 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute doublewords in ymm3/m256/m32bcst using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 36 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute doublewords in zmm3/m512/m32bcst using indices in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.W1 8D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Permute word integers in xmm3/m128 using indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W1 8D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Permute word integers in ymm3/m256 using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W1 8D /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Permute word integers in zmm3/m512 using indexes in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMI2B--Full Permute of Bytes From Two Tables Overwriting the Index.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2B</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.DDS.128.66.0F38.W0 75 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Permute bytes in xmm3/m128 and xmm2 using byte indexes in xmm1 and store the byte results in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2B</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.DDS.256.66.0F38.W0 75 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Permute bytes in ymm3/m256 and ymm2 using byte indexes in ymm1 and store the byte results in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2B</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.DDS.512.66.0F38.W0 75 /r</opc>
+			<cpuid>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Permute bytes in zmm3/m512 and zmm2 using byte indexes in zmm1 and store the byte results in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMI2W/D/Q/PS/PD--Full Permute From Two Tables Overwriting the Index.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2W</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.DDS.128.66.0F38.W1 75 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Permute word integers from two tables in xmm3/m128 and xmm2 using indexes in xmm1 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2W</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.DDS.256.66.0F38.W1 75 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Permute word integers from two tables in ymm3/m256 and ymm2 using indexes in ymm1 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2W</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.DDS.512.66.0F38.W1 75 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Permute word integers from two tables in zmm3/m512 and zmm2 using indexes in zmm1 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2D</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 76 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-words from two tables in xmm3/m128/m32bcst and xmm2 using indexes in xmm1 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2D</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 76 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-words from two tables in ymm3/m256/m32bcst and ymm2 using indexes in ymm1 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2D</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 76 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-words from two tables in zmm3/m512/m32bcst and zmm2 using indices in zmm1 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2Q</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 76 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute quad-words from two tables in xmm3/m128/m64bcst and xmm2 using indexes in xmm1 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2Q</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 76 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute quad-words from two tables in ymm3/m256/m64bcst and ymm2 using indexes in ymm1 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2Q</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 76 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute quad-words from two tables in zmm3/m512/m64bcst and zmm2 using indices in zmm1 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 77 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision FP values from two tables in xmm3/m128/m32bcst and xmm2 using indexes in xmm1 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 77 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision FP values from two tables in ymm3/m256/m32bcst and ymm2 using indexes in ymm1 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 77 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision FP values from two tables in zmm3/m512/m32bcst and zmm2 using indices in zmm1 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 77 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision FP values from two tables in xmm3/m128/m64bcst and xmm2 using indexes in xmm1 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 77 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision FP values from two tables in ymm3/m256/m64bcst and ymm2 using indexes in ymm1 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMI2PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 77 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision FP values from two tables in zmm3/m512/m64bcst and zmm2 using indices in zmm1 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMT2B--Full Permute of Bytes From Two Tables Overwriting a Table.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2B</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.DDS.128.66.0F38.W0 7D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Permute bytes in xmm3/m128 and xmm1 using byte indexes in xmm2 and store the byte results in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2B</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W0 7D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Permute bytes in ymm3/m256 and ymm1 using byte indexes in ymm2 and store the byte results in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2B</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W0 7D /r</opc>
+			<cpuid>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Permute bytes in zmm3/m512 and zmm1 using byte indexes in zmm2 and store the byte results in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMT2W/D/Q/PS/PD--Full Permute from Two Tables Overwriting one Table.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2W</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.DDS.128.66.0F38.W1 7D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Permute word integers from two tables in xmm3/m128 and xmm1 using indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2W</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.DDS.256.66.0F38.W1 7D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Permute word integers from two tables in ymm3/m256 and ymm1 using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2W</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.DDS.512.66.0F38.W1 7D /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Permute word integers from two tables in zmm3/m512 and zmm1 using indexes in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2D</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 7E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-words from two tables in xmm3/m128/m32bcst and xmm1 using indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2D</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 7E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-words from two tables in ymm3/m256/m32bcst and ymm1 using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2D</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 7E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-words from two tables in zmm3/m512/m32bcst and zmm1 using indices in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2Q</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 7E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute quad-words from two tables in xmm3/m128/m64bcst and xmm1 using indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2Q</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 7E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute quad-words from two tables in ymm3/m256/m64bcst and ymm1 using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2Q</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 7E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute quad-words from two tables in zmm3/m512/m64bcst and zmm1 using indices in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2PS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision FP values from two tables in xmm3/m128/m32bcst and xmm1 using indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2PS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision FP values from two tables in ymm3/m256/m32bcst and ymm1 using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2PS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 7F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision FP values from two tables in zmm3/m512/m32bcst and zmm1 using indices in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2PD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision FP values from two tables in xmm3/m128/m64bcst and xmm1 using indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2PD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision FP values from two tables in ymm3/m256/m64bcst and ymm1 using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMT2PD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 7F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision FP values from two tables in zmm3/m512/m64bcst and zmm1 using indices in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMILPD--Permute In-Lane of Pairs of Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 0D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in xmm2 using controls from xmm3/m128 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 0D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in ymm2 using controls from ymm3/m256 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 0D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in xmm2 using control from xmm3/m128/m64bcst and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 0D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in ymm2 using control from ymm3/m256/m64bcst and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 0D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in zmm2 using control from zmm3/m512/m64bcst and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RM">VEX.128.66.0F3A.W0 05 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in xmm2/m128 using controls from imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RM">VEX.256.66.0F3A.W0 05 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in ymm2/m256 using controls from imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W1 05 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in xmm2/m128/m64bcst using controls from imm8 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W1 05 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in ymm2/m256/m64bcst using controls from imm8 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W1 05 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in zmm2/m512/m64bcst using controls from imm8 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMILPS--Permute In-Lane of Quadruples of Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 0C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values in xmm2 using controls from xmm3/m128 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RM">VEX.128.66.0F3A.W0 04 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values in xmm2/m128 using controls from imm8 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 0C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values in ymm2 using controls from ymm3/m256 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RM">VEX.256.66.0F3A.W0 04 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values in ymm2/m256 using controls from imm8 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 0C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values xmm2 using control from xmm3/m128/m32bcst and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 0C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values ymm2 using control from ymm3/m256/m32bcst and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 0C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values zmm2 using control from zmm3/m512/m32bcst and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W0 04 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values xmm2/m128/m32bcst using controls from imm8 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W0 04 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values ymm2/m256/m32bcst using controls from imm8 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W0 04 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values zmm2/m512/m32bcst using controls from imm8 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMPD--Permute Double-Precision Floating-Point Elements.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMPD</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.66.0F3A.W1 01 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point elements in ymm2/m256 using indices in imm8 and store the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W1 01 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point elements in ymm2/m256/m64bcst using indexes in imm8 and store the result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W1 01 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point elements in zmm2/m512/m64bcst using indices in imm8 and store the result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 16 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point elements in ymm3/m256/m64bcst using indexes in ymm2 and store the result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 16 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point elements in zmm3/m512/m64bcst using indices in zmm2 and store the result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMPS--Permute Single-Precision Floating-Point Elements.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.256.66.0F38.W0 16 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point elements in ymm3/m256 using indices in ymm2 and store the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 16 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point elements in ymm3/m256/m32bcst using indexes in ymm2 and store the result in ymm1 subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 16 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values in zmm3/m512/m32bcst using indices in zmm2 and store the result in zmm1 subject to write mask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMQ--Qwords Element Permutation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMQ</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.66.0F3A.W1 00 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Permute qwords in ymm2/m256 using indices in imm8 and store the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W1 00 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute qwords in ymm2/m256/m64bcst using indexes in imm8 and store the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W1 00 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute qwords in zmm2/m512/m64bcst using indices in imm8 and store the result in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 36 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute qwords in ymm3/m256/m64bcst using indexes in ymm2 and store the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 36 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Permute qwords in zmm3/m512/m64bcst using indices in zmm2 and store the result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPEXPANDD--Load Sparse Packed Doubleword Integer Values from Dense Memory / Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXPANDD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 89 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed double-word integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXPANDD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 89 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed double-word integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXPANDD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 89 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed double-word integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPEXPANDQ--Load Sparse Packed Quadword Integer Values from Dense Memory / Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXPANDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 89 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed quad-word integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXPANDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 89 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed quad-word integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXPANDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 89 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Expand packed quad-word integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PEXTRB/PEXTRW/PEXTRD/PEXTRQ--Extract Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PEXTRB</mnem>
+			<args>reg/m8,xmm2,imm8</args>
+			<opc openc="MRI">66 0F 3A 14 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Extract a byte integer value from xmm2 at the source byte offset specified by imm8 into reg or m8. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PEXTRW</mnem>
+			<args>reg,xmm1,imm8</args>
+			<opc openc="RMI">66 0F C5 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Extract the word specified by imm8 from xmm1 and move it to reg, bits 15:0. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PEXTRW</mnem>
+			<args>reg/m16,xmm2,imm8</args>
+			<opc openc="MRI">66 0F 3A 15 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Extract a word integer value from xmm2 at the source word offset specified by imm8 into reg or m16. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PEXTRD</mnem>
+			<args>r32/m32,xmm2,imm8</args>
+			<opc openc="MRI">66 0F 3A 16 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Extract a dword integer value from xmm2 at the source dword offset specified by imm8 into r32/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>PEXTRQ</mnem>
+			<args>r64/m64,xmm2,imm8</args>
+			<opc openc="MRI">66 REX.W 0F 3A 16 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Extract a qword integer value from xmm2 at the source dword offset specified by imm8 into r64/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXTRB</mnem>
+			<args>reg/m8,xmm2,imm8</args>
+			<opc openc="MRI">VEX.128.66.0F3A 14 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract a byte integer value from xmm2 at the source byte offset specified by imm8 into reg or m8. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXTRW</mnem>
+			<args>reg,xmm1,imm8</args>
+			<opc openc="RMI">VEX.128.66.0F C5 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract the word specified by imm8 from xmm1 and move it to reg, bits 15:0. Zero-extend the result. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXTRW</mnem>
+			<args>reg/m16,xmm2,imm8</args>
+			<opc openc="MRI">VEX.128.66.0F3A 15 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract a word integer value from xmm2 at the source word offset specified by imm8 into reg or m16. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXTRD</mnem>
+			<args>r32/m32,xmm2,imm8</args>
+			<opc openc="MRI">VEX.128.66.0F3A.W0 16 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract a dword integer value from xmm2 at the source dword offset specified by imm8 into r32/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>VPEXTRQ</mnem>
+			<args>r64/m64,xmm2,imm8</args>
+			<opc openc="MRI">VEX.128.66.0F3A.W1 16 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract a qword integer value from xmm2 at the source dword offset specified by imm8 into r64/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXTRB</mnem>
+			<args>reg/m8,xmm2,imm8</args>
+			<opc openc="T1S">EVEX.128.66.0F3A.WIG 14 /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Extract a byte integer value from xmm2 at the source byte offset specified by imm8 into reg or m8. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXTRW</mnem>
+			<args>reg,xmm1,imm8</args>
+			<opc openc="RMI">EVEX.128.66.0F.WIG C5 /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Extract the word specified by imm8 from xmm1 and move it to reg, bits 15:0. Zero-extend the result. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXTRW</mnem>
+			<args>reg/m16,xmm2,imm8</args>
+			<opc openc="T1S">EVEX.128.66.0F3A.WIG 15 /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Extract a word integer value from xmm2 at the source word offset specified by imm8 into reg or m16. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXTRD</mnem>
+			<args>r32/m32,xmm2,imm8</args>
+			<opc openc="T1S">EVEX.128.66.0F3A.W0 16 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Extract a dword integer value from xmm2 at the source dword offset specified by imm8 into r32/m32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VPEXTRQ</mnem>
+			<args>r64/m64,xmm2,imm8</args>
+			<opc openc="T1S">EVEX.128.66.0F3A.W1 16 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Extract a qword integer value from xmm2 at the source dword offset specified by imm8 into r64/m64.</dscrp>
+		</ins>
+		<oprndenc openc="MRI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S-MRI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPLZCNTD/Q--Count the Number of Leading Zero Bits for Packed Dword, Packed Qword Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPLZCNTD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W0 44 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Count the number of leading zero bits in each dword element of xmm2/m128/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPLZCNTD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W0 44 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Count the number of leading zero bits in each dword element of ymm2/m256/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPLZCNTD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W0 44 /r</opc>
+			<cpuid>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Count the number of leading zero bits in each dword element of zmm2/m512/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPLZCNTQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W1 44 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Count the number of leading zero bits in each qword element of xmm2/m128/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPLZCNTQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W1 44 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Count the number of leading zero bits in each qword element of ymm2/m256/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPLZCNTQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W1 44 /r</opc>
+			<cpuid>
+				<flag>AVX512CD</flag>
+			</cpuid>
+			<dscrp>Count the number of leading zero bits in each qword element of zmm2/m512/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMADDUBSW--Multiply and Add Packed Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMADDUBSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 04 /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDUBSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38 04 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDUBSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38 04 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDUBSW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.WIG 04 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDUBSW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.WIG 04 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDUBSW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.WIG 04 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMADDWD--Multiply and Add Packed Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMADDWD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F5 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed word integers in xmm1 by the packed word integers in xmm2/m128, add adjacent doubleword results, and store in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDWD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F F5 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the packed word integers in xmm2 by the packed word integers in xmm3/m128, add adjacent doubleword results, and store in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDWD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F F5 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed word integers in ymm2 by the packed word integers in ymm3/m256, add adjacent doubleword results, and store in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDWD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG F5 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed word integers in xmm2 by the packed word integers in xmm3/m128, add adjacent doubleword results, and store in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDWD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG F5 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed word integers in ymm2 by the packed word integers in ymm3/m256, add adjacent doubleword results, and store in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDWD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG F5 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed word integers in zmm2 by the packed word integers in zmm3/m512, add adjacent doubleword results, and store in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PINSRB/PINSRW/PINSRD/PINSRQ--Insert Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PINSRB</mnem>
+			<args>xmm1,r32/m8,imm8</args>
+			<opc openc="RMI">66 0F 3A 20 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Insert a byte integer value from r32/m8 into xmm1 at the byte offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PINSRW</mnem>
+			<args>xmm1,r32/m16,imm8</args>
+			<opc openc="RMI">66 0F C4 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Insert a word integer value from r32/m16 into xmm1 at the word offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PINSRD</mnem>
+			<args>xmm1,r32/m32,imm8</args>
+			<opc openc="RMI">66 0F 3A 22 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Insert a dword integer value from r32/m32 into xmm1 at the dword offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>PINSRQ</mnem>
+			<args>xmm1,r64/m64,imm8</args>
+			<opc openc="RMI">66 REX.W 0F 3A 22 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Insert a qword integer value from r64/m64 into xmm1 at the qword offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPINSRB</mnem>
+			<args>xmm1,xmm2,r32/m8,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A 20 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge a byte integer value from r32/m8 and rest from xmm2 into xmm1 at the byte offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPINSRW</mnem>
+			<args>xmm1,xmm2,r32/m16,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F C4 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Insert a word integer value from r32/m16 and rest from xmm2 into xmm1 at the word offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPINSRD</mnem>
+			<args>xmm1,xmm2,r32/m32,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.W0 22 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Insert a dword integer value from r32/m32 and rest from xmm2 into xmm1 at the dword offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VPINSRQ</mnem>
+			<args>xmm1,xmm2,r64/m64,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.W1 22 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Insert a qword integer value from r64/m64 and rest from xmm2 into xmm1 at the qword offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPINSRB</mnem>
+			<args>xmm1,xmm2,r32/m8,imm8</args>
+			<opc openc="T1S">EVEX.NDS.128.66.0F3A.WIG 20 /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Merge a byte integer value from r32/m8 and rest from xmm2 into xmm1 at the byte offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPINSRW</mnem>
+			<args>xmm1,xmm2,r32/m16,imm8</args>
+			<opc openc="T1S">EVEX.NDS.128.66.0F.WIG C4 /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Insert a word integer value from r32/m16 and rest from xmm2 into xmm1 at the word offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPINSRD</mnem>
+			<args>xmm1,xmm2,r32/m32,imm8</args>
+			<opc openc="T1S">EVEX.NDS.128.66.0F3A.W0 22 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Insert a dword integer value from r32/m32 and rest from xmm2 into xmm1 at the dword offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VPINSRQ</mnem>
+			<args>xmm1,xmm2,r64/m64,imm8</args>
+			<opc openc="T1S">EVEX.NDS.128.66.0F3A.W1 22 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Insert a qword integer value from r64/m64 and rest from xmm2 into xmm1 at the qword offset in imm8.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMADD52LUQ--Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit Products to Qword Accumulators.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADD52LUQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 B4 /r</opc>
+			<cpuid>
+				<flag>AVX512IFMA</flag>
+				<flag>AVX512VL</flag>
+			</cpuid>
+			<dscrp>Multiply unsigned 52-bit integers in xmm2 and xmm3/m128 and add the low 52 bits of the 104-bit product to the qword unsigned integers in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADD52LUQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 B4 /r</opc>
+			<cpuid>
+				<flag>AVX512IFMA</flag>
+				<flag>AVX512VL</flag>
+			</cpuid>
+			<dscrp>Multiply unsigned 52-bit integers in ymm2 and ymm3/m128 and add the low 52 bits of the 104-bit product to the qword unsigned integers in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADD52LUQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 B4 /r</opc>
+			<cpuid>
+				<flag>AVX512IFMA</flag>
+			</cpuid>
+			<dscrp>Multiply unsigned 52-bit integers in zmm2 and zmm3/m128 and add the low 52 bits of the 104-bit product to the qword unsigned integers in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMADD52HUQ--Packed Multiply of Unsigned 52-bit Unsigned Integers and Add High 52-bit Products to 64-bit Accumulators'.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADD52HUQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 B5 /r</opc>
+			<cpuid>
+				<flag>AVX512IFMA</flag>
+				<flag>AVX512VL</flag>
+			</cpuid>
+			<dscrp>Multiply unsigned 52-bit integers in xmm2 and xmm3/m128 and add the high 52 bits of the 104bit product to the qword unsigned integers in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADD52HUQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 B5 /r</opc>
+			<cpuid>
+				<flag>AVX512IFMA</flag>
+				<flag>AVX512VL</flag>
+			</cpuid>
+			<dscrp>Multiply unsigned 52-bit integers in ymm2 and ymm3/m128 and add the high 52 bits of the 104bit product to the qword unsigned integers in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADD52HUQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 B5 /r</opc>
+			<cpuid>
+				<flag>AVX512IFMA</flag>
+			</cpuid>
+			<dscrp>Multiply unsigned 52-bit integers in zmm2 and zmm3/m128 and add the high 52 bits of the 104bit product to the qword unsigned integers in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMAXSB/PMAXSW/PMAXSD/PMAXSQ--Maximum of Packed Signed Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 3C /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F EE /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm2/m128 and xmm1 and stores maximum packed values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXSD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 3D /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in xmm1 and xmm2/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 3C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG EE /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm3/m128 and xmm2 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 3D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 3C /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG EE /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in ymm3/m256 and ymm2 and store packed maximum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 3D /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSB</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.WIG 3C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSB</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.WIG 3C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSB</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.WIG 3C /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in zmm2 and zmm3/m512 and store packed maximum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSW</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG EE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSW</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG EE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSW</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG EE /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in zmm2 and zmm3/m512 and store packed maximum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 3D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in xmm2 and xmm3/m128/m32bcst and store packed maximum values in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 3D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in ymm2 and ymm3/m256/m32bcst and store packed maximum values in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 3D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in zmm2 and zmm3/m512/m32bcst and store packed maximum values in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 3D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qword integers in xmm2 and xmm3/m128/m64bcst and store packed maximum values in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 3D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qword integers in ymm2 and ymm3/m256/m64bcst and store packed maximum values in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 3D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qword integers in zmm2 and zmm3/m512/m64bcst and store packed maximum values in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMAXUB/PMAXUW--Maximum of Packed Unsigned Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXUB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F DE /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXUW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 3E/r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in xmm2/m128 and xmm1 and stores maximum packed values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F DE /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38 3E/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in xmm3/m128 and xmm2 and store maximum packed values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F DE /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38 3E/r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in ymm3/m256 and ymm2 and store maximum packed values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUB</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG DE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUB</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG DE /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUB</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG DE /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in zmm2 and zmm3/m512 and store packed maximum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUW</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.WIG 3E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUW</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.WIG 3E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUW</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.WIG 3E /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in zmm2 and zmm3/m512 and store packed maximum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMAXUD/PMAXUQ--Maximum of Packed Unsigned Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXUD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 3F /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in xmm1 and xmm2/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 3F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 3F /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 3F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in xmm2 and xmm3/m128/m32bcst and store packed maximum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 3F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in ymm2 and ymm3/m256/m32bcst and store packed maximum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 3F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in zmm2 and zmm3/m512/m32bcst and store packed maximum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 3F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned qword integers in xmm2 and xmm3/m128/m64bcst and store packed maximum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 3F /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned qword integers in ymm2 and ymm3/m256/m64bcst and store packed maximum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 3F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned qword integers in zmm2 and zmm3/m512/m64bcst and store packed maximum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMINSB/PMINSW--Minimum of Packed Signed Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 38 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F EA /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm2/m128 and xmm1 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38 38 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F EA /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm3/m128 and xmm2 and return packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38 38 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F EA /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in ymm3/m256 and ymm2 and return packed minimum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSB</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.WIG 38 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSB</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.WIG 38 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSB</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.WIG 38 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in zmm2 and zmm3/m512 and store packed minimum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSW</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG EA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSW</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG EA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSW</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG EA /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in zmm2 and zmm3/m512 and store packed minimum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMINSD/PMINSQ--Minimum of Packed Signed Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINSD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 39 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in xmm1 and xmm2/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 39 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 39 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in ymm2 and ymm3/m128 and store packed minimum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 39 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 39 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 39 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in zmm2 and zmm3/m512/m32bcst and store packed minimum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 39 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qword integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 39 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qword integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 39 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qword integers in zmm2 and zmm3/m512/m64bcst and store packed minimum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMINUB/PMINUW--Minimum of Packed Unsigned Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINUB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F DA /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINUW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 3A/r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in xmm2/m128 and xmm1 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F DA /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38 3A/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in xmm3/m128 and xmm2 and return packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F DA /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38 3A/r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in ymm3/m256 and ymm2 and return packed minimum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F DA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F DA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F DA /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in zmm2 and zmm3/m512 and store packed minimum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUW</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38 3A/r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in xmm3/m128 and xmm2 and return packed minimum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUW</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38 3A/r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in ymm3/m256 and ymm2 and return packed minimum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUW</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38 3A/r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in zmm3/m512 and zmm2 and return packed minimum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMINUD/PMINUQ--Minimum of Packed Unsigned Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINUD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 3B /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in xmm1 and xmm2/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 3B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 3B /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 3B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in xmm2 and xmm3/m128/m32bcst and store packed minimum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 3B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in ymm2 and ymm3/m256/m32bcst and store packed minimum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 3B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in zmm2 and zmm3/m512/m32bcst and store packed minimum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 3B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned qword integers in xmm2 and xmm3/m128/m64bcst and store packed minimum values in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 3B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned qword integers in ymm2 and ymm3/m256/m64bcst and store packed minimum values in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 3B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned qword integers in zmm2 and zmm3/m512/m64bcst and store packed minimum values in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMOVM2B/VPMOVM2W/VPMOVM2D/VPMOVM2Q--Convert a Mask Register to a Vector Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2B</mnem>
+			<args>xmm1,k1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W0 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each byte in XMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2B</mnem>
+			<args>ymm1,k1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W0 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each byte in YMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2B</mnem>
+			<args>zmm1,k1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W0 28 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each byte in ZMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2W</mnem>
+			<args>xmm1,k1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W1 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each word in XMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2W</mnem>
+			<args>ymm1,k1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W1 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each word in YMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2W</mnem>
+			<args>zmm1,k1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W1 28 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each word in ZMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2D</mnem>
+			<args>xmm1,k1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W0 38 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each doubleword in XMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2D</mnem>
+			<args>ymm1,k1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W0 38 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each doubleword in YMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2D</mnem>
+			<args>zmm1,k1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W0 38 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each doubleword in ZMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2Q</mnem>
+			<args>xmm1,k1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W1 38 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each quadword in XMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2Q</mnem>
+			<args>ymm1,k1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W1 38 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each quadword in YMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVM2Q</mnem>
+			<args>zmm1,k1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W1 38 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each quadword in ZMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMOVB2M/VPMOVW2M/VPMOVD2M/VPMOVQ2M--Convert a Vector Register to a Mask.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVB2M</mnem>
+			<args>k1,xmm1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W0 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding byte in XMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVB2M</mnem>
+			<args>k1,ymm1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W0 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding byte in YMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVB2M</mnem>
+			<args>k1,zmm1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W0 29 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding byte in ZMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVW2M</mnem>
+			<args>k1,xmm1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W1 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding word in XMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVW2M</mnem>
+			<args>k1,ymm1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W1 29 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding word in YMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVW2M</mnem>
+			<args>k1,zmm1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W1 29 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding word in ZMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVD2M</mnem>
+			<args>k1,xmm1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W0 39 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding doubleword in XMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVD2M</mnem>
+			<args>k1,ymm1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W0 39 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding doubleword in YMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVD2M</mnem>
+			<args>k1,zmm1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W0 39 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding doubleword in ZMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQ2M</mnem>
+			<args>k1,xmm1</args>
+			<opc openc="RM">EVEX.128.F3.0F38.W1 39 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding quadword in XMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQ2M</mnem>
+			<args>k1,ymm1</args>
+			<opc openc="RM">EVEX.256.F3.0F38.W1 39 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding quadword in YMM1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQ2M</mnem>
+			<args>k1,zmm1</args>
+			<opc openc="RM">EVEX.512.F3.0F38.W1 39 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding quadword in ZMM1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMOVQB/VPMOVSQB/VPMOVUSQB--Down Convert QWord to Byte.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQB</mnem>
+			<args>xmm1/m16 {k1}{z},xmm2</args>
+			<opc openc="OVM">EVEX.128.F3.0F38.W0 32 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 2 packed quad-word integers from xmm2 into 2 packed byte integers in xmm1/m16 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSQB</mnem>
+			<args>xmm1/m16 {k1}{z},xmm2</args>
+			<opc openc="OVM">EVEX.128.F3.0F38.W0 22 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 2 packed signed quad-word integers from xmm2 into 2 packed signed byte integers in xmm1/m16 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSQB</mnem>
+			<args>xmm1/m16 {k1}{z},xmm2</args>
+			<opc openc="OVM">EVEX.128.F3.0F38.W0 12 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 2 packed unsigned quad-word integers from xmm2 into 2 packed unsigned byte integers in xmm1/m16 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQB</mnem>
+			<args>xmm1/m32 {k1}{z},ymm2</args>
+			<opc openc="OVM">EVEX.256.F3.0F38.W0 32 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed quad-word integers from ymm2 into 4 packed byte integers in xmm1/m32 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSQB</mnem>
+			<args>xmm1/m32 {k1}{z},ymm2</args>
+			<opc openc="OVM">EVEX.256.F3.0F38.W0 22 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed signed quad-word integers from ymm2 into 4 packed signed byte integers in xmm1/m32 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSQB</mnem>
+			<args>xmm1/m32 {k1}{z},ymm2</args>
+			<opc openc="OVM">EVEX.256.F3.0F38.W0 12 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed unsigned quad-word integers from ymm2 into 4 packed unsigned byte integers in xmm1/m32 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQB</mnem>
+			<args>xmm1/m64 {k1}{z},zmm2</args>
+			<opc openc="OVM">EVEX.512.F3.0F38.W0 32 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed quad-word integers from zmm2 into 8 packed byte integers in xmm1/m64 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSQB</mnem>
+			<args>xmm1/m64 {k1}{z},zmm2</args>
+			<opc openc="OVM">EVEX.512.F3.0F38.W0 22 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed quad-word integers from zmm2 into 8 packed signed byte integers in xmm1/m64 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSQB</mnem>
+			<args>xmm1/m64 {k1}{z},zmm2</args>
+			<opc openc="OVM">EVEX.512.F3.0F38.W0 12 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed unsigned quad-word integers from zmm2 into 8 packed unsigned byte integers in xmm1/m64 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="OVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMOVQW/VPMOVSQW/VPMOVUSQW--Down Convert QWord to Word.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQW</mnem>
+			<args>xmm1/m32 {k1}{z},xmm2</args>
+			<opc openc="QVM">EVEX.128.F3.0F38.W0 34 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 2 packed quad-word integers from xmm2 into 2 packed word integers in xmm1/m32 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSQW</mnem>
+			<args>xmm1/m32 {k1}{z},xmm2</args>
+			<opc openc="QVM">EVEX.128.F3.0F38.W0 24 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed quad-word integers from zmm2 into 8 packed signed word integers in xmm1/m32 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSQW</mnem>
+			<args>xmm1/m32 {k1}{z},xmm2</args>
+			<opc openc="QVM">EVEX.128.F3.0F38.W0 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 2 packed unsigned quad-word integers from xmm2 into 2 packed unsigned word integers in xmm1/m32 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQW</mnem>
+			<args>xmm1/m64 {k1}{z},ymm2</args>
+			<opc openc="QVM">EVEX.256.F3.0F38.W0 34 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed quad-word integers from ymm2 into 4 packed word integers in xmm1/m64 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSQW</mnem>
+			<args>xmm1/m64 {k1}{z},ymm2</args>
+			<opc openc="QVM">EVEX.256.F3.0F38.W0 24 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed signed quad-word integers from ymm2 into 4 packed signed word integers in xmm1/m64 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSQW</mnem>
+			<args>xmm1/m64 {k1}{z},ymm2</args>
+			<opc openc="QVM">EVEX.256.F3.0F38.W0 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed unsigned quad-word integers from ymm2 into 4 packed unsigned word integers in xmm1/m64 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQW</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2</args>
+			<opc openc="QVM">EVEX.512.F3.0F38.W0 34 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed quad-word integers from zmm2 into 8 packed word integers in xmm1/m128 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSQW</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2</args>
+			<opc openc="QVM">EVEX.512.F3.0F38.W0 24 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed quad-word integers from zmm2 into 8 packed signed word integers in xmm1/m128 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSQW</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2</args>
+			<opc openc="QVM">EVEX.512.F3.0F38.W0 14 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed unsigned quad-word integers from zmm2 into 8 packed unsigned word integers in xmm1/m128 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="QVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMOVQD/VPMOVSQD/VPMOVUSQD--Down Convert QWord to DWord.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQD</mnem>
+			<args>xmm1/m128 {k1}{z},xmm2</args>
+			<opc openc="A">EVEX.128.F3.0F38.W0 35 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 2 packed quad-word integers from xmm2 into 2 packed double-word integers in xmm1/m128 with truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSQD</mnem>
+			<args>xmm1/m64 {k1}{z},xmm2</args>
+			<opc openc="A">EVEX.128.F3.0F38.W0 25 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 2 packed signed quad-word integers from xmm2 into 2 packed signed double-word integers in xmm1/m64 using signed saturation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSQD</mnem>
+			<args>xmm1/m64 {k1}{z},xmm2</args>
+			<opc openc="A">EVEX.128.F3.0F38.W0 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 2 packed unsigned quad-word integers from xmm2 into 2 packed unsigned double-word integers in xmm1/m64 using unsigned saturation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQD</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2</args>
+			<opc openc="A">EVEX.256.F3.0F38.W0 35 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed quad-word integers from ymm2 into 4 packed double-word integers in xmm1/m128 with truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSQD</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2</args>
+			<opc openc="A">EVEX.256.F3.0F38.W0 25 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed signed quad-word integers from ymm2 into 4 packed signed double-word integers in xmm1/m128 using signed saturation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSQD</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2</args>
+			<opc openc="A">EVEX.256.F3.0F38.W0 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed unsigned quad-word integers from ymm2 into 4 packed unsigned double-word integers in xmm1/m128 using unsigned saturation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVQD</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2</args>
+			<opc openc="HVM">EVEX.512.F3.0F38.W0 35 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed quad-word integers from zmm2 into 8 packed double-word integers in ymm1/m256 with truncation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSQD</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2</args>
+			<opc openc="HVM">EVEX.512.F3.0F38.W0 25 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed quad-word integers from zmm2 into 8 packed signed double-word integers in ymm1/m256 using signed saturation subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSQD</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2</args>
+			<opc openc="HVM">EVEX.512.F3.0F38.W0 15 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed unsigned quad-word integers from zmm2 into 8 packed unsigned double-word integers in ymm1/m256 using unsigned saturation subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="HVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMOVDB/VPMOVSDB/VPMOVUSDB--Down Convert DWord to Byte.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVDB</mnem>
+			<args>xmm1/m32 {k1}{z},xmm2</args>
+			<opc openc="QVM">EVEX.128.F3.0F38.W0 31 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed double-word integers from xmm2 into 4 packed byte integers in xmm1/m32 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSDB</mnem>
+			<args>xmm1/m32 {k1}{z},xmm2</args>
+			<opc openc="QVM">EVEX.128.F3.0F38.W0 21 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed signed double-word integers from xmm2 into 4 packed signed byte integers in xmm1/m32 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSDB</mnem>
+			<args>xmm1/m32 {k1}{z},xmm2</args>
+			<opc openc="QVM">EVEX.128.F3.0F38.W0 11 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed unsigned double-word integers from xmm2 into 4 packed unsigned byte integers in xmm1/m32 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVDB</mnem>
+			<args>xmm1/m64 {k1}{z},ymm2</args>
+			<opc openc="QVM">EVEX.256.F3.0F38.W0 31 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed double-word integers from ymm2 into 8 packed byte integers in xmm1/m64 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSDB</mnem>
+			<args>xmm1/m64 {k1}{z},ymm2</args>
+			<opc openc="QVM">EVEX.256.F3.0F38.W0 21 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed double-word integers from ymm2 into 8 packed signed byte integers in xmm1/m64 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSDB</mnem>
+			<args>xmm1/m64 {k1}{z},ymm2</args>
+			<opc openc="QVM">EVEX.256.F3.0F38.W0 11 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed unsigned double-word integers from ymm2 into 8 packed unsigned byte integers in xmm1/m64 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVDB</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2</args>
+			<opc openc="QVM">EVEX.512.F3.0F38.W0 31 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed double-word integers from zmm2 into 16 packed byte integers in xmm1/m128 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSDB</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2</args>
+			<opc openc="QVM">EVEX.512.F3.0F38.W0 21 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed signed double-word integers from zmm2 into 16 packed signed byte integers in xmm1/m128 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSDB</mnem>
+			<args>xmm1/m128 {k1}{z},zmm2</args>
+			<opc openc="QVM">EVEX.512.F3.0F38.W0 11 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed unsigned double-word integers from zmm2 into 16 packed unsigned byte integers in xmm1/m128 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="QVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMOVDW/VPMOVSDW/VPMOVUSDW--Down Convert DWord to Word.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVDW</mnem>
+			<args>xmm1/m64 {k1}{z},xmm2</args>
+			<opc openc="HVM">EVEX.128.F3.0F38.W0 33 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed double-word integers from xmm2 into 4 packed word integers in xmm1/m64 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSDW</mnem>
+			<args>xmm1/m64 {k1}{z},xmm2</args>
+			<opc openc="HVM">EVEX.128.F3.0F38.W0 23 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed signed double-word integers from xmm2 into 4 packed signed word integers in ymm1/m64 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSDW</mnem>
+			<args>xmm1/m64 {k1}{z},xmm2</args>
+			<opc openc="HVM">EVEX.128.F3.0F38.W0 13 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed unsigned double-word integers from xmm2 into 4 packed unsigned word integers in xmm1/m64 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVDW</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2</args>
+			<opc openc="HVM">EVEX.256.F3.0F38.W0 33 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed double-word integers from ymm2 into 8 packed word integers in xmm1/m128 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSDW</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2</args>
+			<opc openc="HVM">EVEX.256.F3.0F38.W0 23 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed double-word integers from ymm2 into 8 packed signed word integers in xmm1/m128 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSDW</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2</args>
+			<opc openc="HVM">EVEX.256.F3.0F38.W0 13 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed unsigned double-word integers from ymm2 into 8 packed unsigned word integers in xmm1/m128 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVDW</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2</args>
+			<opc openc="HVM">EVEX.512.F3.0F38.W0 33 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed double-word integers from zmm2 into 16 packed word integers in ymm1/m256 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSDW</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2</args>
+			<opc openc="HVM">EVEX.512.F3.0F38.W0 23 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed signed double-word integers from zmm2 into 16 packed signed word integers in ymm1/m256 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSDW</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2</args>
+			<opc openc="HVM">EVEX.512.F3.0F38.W0 13 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed unsigned double-word integers from zmm2 into 16 packed unsigned word integers in ymm1/m256 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="HVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMOVWB/VPMOVSWB/VPMOVUSWB--Down Convert Word to Byte.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVWB</mnem>
+			<args>xmm1/m64 {k1}{z},xmm2</args>
+			<opc openc="HVM">EVEX.128.F3.0F38.W0 30 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed word integers from xmm2 into 8 packed bytes in xmm1/m64 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSWB</mnem>
+			<args>xmm1/m64 {k1}{z},xmm2</args>
+			<opc openc="HVM">EVEX.128.F3.0F38.W0 20 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed word integers from xmm2 into 8 packed signed bytes in xmm1/m64 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSWB</mnem>
+			<args>xmm1/m64 {k1}{z},xmm2</args>
+			<opc openc="HVM">EVEX.128.F3.0F38.W0 10 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed unsigned word integers from xmm2 into 8 packed unsigned bytes in 8mm1/m64 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVWB</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2</args>
+			<opc openc="HVM">EVEX.256.F3.0F38.W0 30 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed word integers from ymm2 into 16 packed bytes in xmm1/m128 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSWB</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2</args>
+			<opc openc="HVM">EVEX.256.F3.0F38.W0 20 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed signed word integers from ymm2 into 16 packed signed bytes in xmm1/m128 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSWB</mnem>
+			<args>xmm1/m128 {k1}{z},ymm2</args>
+			<opc openc="HVM">EVEX.256.F3.0F38.W0 10 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed unsigned word integers from ymm2 into 16 packed unsigned bytes in xmm1/m128 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVWB</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2</args>
+			<opc openc="HVM">EVEX.512.F3.0F38.W0 30 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts 32 packed word integers from zmm2 into 32 packed bytes in ymm1/m256 with truncation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSWB</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2</args>
+			<opc openc="HVM">EVEX.512.F3.0F38.W0 20 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts 32 packed signed word integers from zmm2 into 32 packed signed bytes in ymm1/m256 using signed saturation under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVUSWB</mnem>
+			<args>ymm1/m256 {k1}{z},zmm2</args>
+			<opc openc="HVM">EVEX.512.F3.0F38.W0 10 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Converts 32 packed unsigned word integers from zmm2 into 32 packed unsigned bytes in ymm1/m256 using unsigned saturation under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="HVM">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMOVSX--Packed Move with Sign Extend.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVSXBW</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0f 38 20 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVSXBD</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">66 0f 38 21 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVSXBQ</mnem>
+			<args>xmm1,xmm2/m16</args>
+			<opc openc="RM">66 0f 38 22 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVSXWD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0f 38 23/r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVSXWQ</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">66 0f 38 24 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVSXDQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0f 38 25 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBW</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 20 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBD</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 21 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBQ</mnem>
+			<args>xmm1,xmm2/m16</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 22 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 23 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWQ</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 24 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXDQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 25 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBW</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 20 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sign extend 16 packed 8-bit integers in xmm2/m128 to 16 packed 16-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBD</mnem>
+			<args>ymm1,xmm2/m64</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 21 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 32-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBQ</mnem>
+			<args>ymm1,xmm2/m32</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 22 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 64-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWD</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 23 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 16-bit integers in the low 16 bytes of xmm2/m128 to 8 packed 32-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWQ</mnem>
+			<args>ymm1,xmm2/m64</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 24 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 64-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXDQ</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 25 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 32-bit integers in the low 16 bytes of xmm2/m128 to 4 packed 64-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBW</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="HVM">EVEX.128.66.0F38.WIG 20 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 8-bit integers in xmm2/m64 to 8 packed 16-bit integers in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBW</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128</args>
+			<opc openc="HVM">EVEX.256.66.0F38.WIG 20 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sign extend 16 packed 8-bit integers in xmm2/m128 to 16 packed 16-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBW</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256</args>
+			<opc openc="HVM">EVEX.512.66.0F38.WIG 20 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Sign extend 32 packed 8-bit integers in ymm2/m256 to 32 packed 16-bit integers in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m32</args>
+			<opc openc="QVM">EVEX.128.66.0F38.WIG 21 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBD</mnem>
+			<args>ymm1 {k1}{z},xmm2/m64</args>
+			<opc openc="QVM">EVEX.256.66.0F38.WIG 21 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 32-bit integers in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBD</mnem>
+			<args>zmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="QVM">EVEX.512.66.0F38.WIG 21 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 16 packed 8-bit integers in the low 16 bytes of xmm2/m128 to 16 packed 32-bit integers in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m16</args>
+			<opc openc="OVM">EVEX.128.66.0F38.WIG 22 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m32</args>
+			<opc openc="OVM">EVEX.256.66.0F38.WIG 22 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 64-bit integers in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBQ</mnem>
+			<args>zmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="OVM">EVEX.512.66.0F38.WIG 22 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 64-bit integers in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="HVM">EVEX.128.66.0F38.WIG 23 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 16-bit integers in the low 8 bytes of ymm2/mem to 4 packed 32-bit integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWD</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128</args>
+			<opc openc="HVM">EVEX.256.66.0F38.WIG 23 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 16-bit integers in the low 16 bytes of ymm2/m128 to 8 packed 32-bit integers in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWD</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256</args>
+			<opc openc="HVM">EVEX.512.66.0F38.WIG 23 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 16 packed 16-bit integers in the low 32 bytes of ymm2/m256 to 16 packed 32-bit integers in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m32</args>
+			<opc openc="QVM">EVEX.128.66.0F38.WIG 24 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m64</args>
+			<opc openc="QVM">EVEX.256.66.0F38.WIG 24 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 64-bit integers in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWQ</mnem>
+			<args>zmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="QVM">EVEX.512.66.0F38.WIG 24 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 16-bit integers in the low 16 bytes of xmm2/m128 to 8 packed 64-bit integers in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="HVM">EVEX.128.66.0F38.W0 25 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXDQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128</args>
+			<opc openc="HVM">EVEX.256.66.0F38.W0 25 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 32-bit integers in the low 16 bytes of xmm2/m128 to 4 packed 64-bit integers in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXDQ</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256</args>
+			<opc openc="HVM">EVEX.512.66.0F38.W0 25 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 32-bit integers in the low 32 bytes of ymm2/m256 to 8 packed 64-bit integers in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="HVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="QVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="OVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMOVZX--Packed Move with Zero Extend.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVZXBW</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0f 38 30 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVZXBD</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">66 0f 38 31 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVZXBQ</mnem>
+			<args>xmm1,xmm2/m16</args>
+			<opc openc="RM">66 0f 38 32 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVZXWD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0f 38 33 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVZXWQ</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">66 0f 38 34 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVZXDQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0f 38 35 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBW</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 30 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBD</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 31 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBQ</mnem>
+			<args>xmm1,xmm2/m16</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 32 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 33 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWQ</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 34 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXDQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F 38.WIG 35 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBW</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 30 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Zero extend 16 packed 8-bit integers in xmm2/m128 to 16 packed 16-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBD</mnem>
+			<args>ymm1,xmm2/m64</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 31 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 32-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBQ</mnem>
+			<args>ymm1,xmm2/m32</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 32 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 64-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWD</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 33 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 16-bit integers xmm2/m128 to 8 packed 32-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWQ</mnem>
+			<args>ymm1,xmm2/m64</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 34 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXDQ</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 35 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 32-bit integers in xmm2/m128 to 4 packed 64-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBW</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="HVM">EVEX.128.66.0F38 30.WIG /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBW</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128</args>
+			<opc openc="HVM">EVEX.256.66.0F38.WIG 30 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Zero extend 16 packed 8-bit integers in xmm2/m128 to 16 packed 16-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBW</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256</args>
+			<opc openc="HVM">EVEX.512.66.0F38.WIG 30 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Zero extend 32 packed 8-bit integers in ymm2/m256 to 32 packed 16-bit integers in zmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m32</args>
+			<opc openc="QVM">EVEX.128.66.0F38.WIG 31 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBD</mnem>
+			<args>ymm1 {k1}{z},xmm2/m64</args>
+			<opc openc="QVM">EVEX.256.66.0F38.WIG 31 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 32-bit integers in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBD</mnem>
+			<args>zmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="QVM">EVEX.512.66.0F38.WIG 31 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 16 packed 8-bit integers in xmm2/m128 to 16 packed 32-bit integers in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m16</args>
+			<opc openc="OVM">EVEX.128.66.0F38.WIG 32 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m32</args>
+			<opc openc="OVM">EVEX.256.66.0F38.WIG 32 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 64-bit integers in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBQ</mnem>
+			<args>zmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="OVM">EVEX.512.66.0F38.WIG 32 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 64-bit integers in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="HVM">EVEX.128.66.0F38.WIG 33 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWD</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128</args>
+			<opc openc="HVM">EVEX.256.66.0F38.WIG 33 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 16-bit integers in xmm2/m128 to 8 packed 32-bit integers in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWD</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256</args>
+			<opc openc="HVM">EVEX.512.66.0F38.WIG 33 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 16 packed 16-bit integers in ymm2/m256 to 16 packed 32-bit integers in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m32</args>
+			<opc openc="QVM">EVEX.128.66.0F38.WIG 34 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m64</args>
+			<opc openc="QVM">EVEX.256.66.0F38.WIG 34 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 64-bit integers in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWQ</mnem>
+			<args>zmm1 {k1}{z},xmm2/m128</args>
+			<opc openc="QVM">EVEX.512.66.0F38.WIG 34 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 16-bit integers in xmm2/m128 to 8 packed 64-bit integers in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m64</args>
+			<opc openc="HVM">EVEX.128.66.0F38.W0 35 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXDQ</mnem>
+			<args>ymm1 {k1}{z},xmm2/m128</args>
+			<opc openc="HVM">EVEX.256.66.0F38.W0 35 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 32-bit integers in xmm2/m128 to 4 packed 64-bit integers in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXDQ</mnem>
+			<args>zmm1 {k1}{z},ymm2/m256</args>
+			<opc openc="HVM">EVEX.512.66.0F38.W0 35 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 32-bit integers in ymm2/m256 to 8 packed 64-bit integers in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="HVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="QVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="OVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULDQ--Multiply Packed Doubleword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 28 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Multiply packed signed doubleword integers in xmm1 by packed signed doubleword integers in xmm2/m128, and store the quadword results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 28 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed signed doubleword integers in xmm2 by packed signed doubleword integers in xmm3/m128, and store the quadword results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 28 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply packed signed doubleword integers in ymm2 by packed signed doubleword integers in ymm3/m256, and store the quadword results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed signed doubleword integers in xmm2 by packed signed doubleword integers in xmm3/m128/m64bcst, and store the quadword results in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 28 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed signed doubleword integers in ymm2 by packed signed doubleword integers in ymm3/m256/m64bcst, and store the quadword results in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 28 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed signed doubleword integers in zmm2 by packed signed doubleword integers in zmm3/m512/m64bcst, and store the quadword results in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULHRSW--Multiply Packed Unsigned Integers with Round and Scale.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULHRSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 0B /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHRSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38 0B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHRSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38 0B /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHRSW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.WIG 0B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHRSW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.WIG 0B /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHRSW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.WIG 0B /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULHUW--Multiply Packed Unsigned Integers and Store High Result.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULHUW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E4 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed unsigned word integers in xmm1 and xmm2/m128, and store the high 16 bits of the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHUW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F E4 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the packed unsigned word integers in xmm2 and xmm3/m128, and store the high 16 bits of the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHUW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F E4 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed unsigned word integers in ymm2 and ymm3/m256, and store the high 16 bits of the results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHUW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG E4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed unsigned word integers in xmm2 and xmm3/m128, and store the high 16 bits of the results in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHUW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG E4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed unsigned word integers in ymm2 and ymm3/m256, and store the high 16 bits of the results in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHUW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG E4 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed unsigned word integers in zmm2 and zmm3/m512, and store the high 16 bits of the results in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULHW--Multiply Packed Integers and Store High Result.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULHW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E5 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the high 16 bits of the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F E5 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in xmm2 and xmm3/m128, and store the high 16 bits of the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F E5 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in ymm2 and ymm3/m256, and store the high 16 bits of the results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG E5 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in xmm2 and xmm3/m128, and store the high 16 bits of the results in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG E5 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in ymm2 and ymm3/m256, and store the high 16 bits of the results in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG E5 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in zmm2 and zmm3/m512, and store the high 16 bits of the results in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULLD/PMULLQ--Multiply Packed Integers and Store Low Result.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULLD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 40 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Multiply the packed dword signed integers in xmm1 and xmm2/m128 and store the low 32 bits of each product in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 40 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the packed dword signed integers in xmm2 and xmm3/m128 and store the low 32 bits of each product in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 40 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed dword signed integers in ymm2 and ymm3/m256 and store the low 32 bits of each product in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 40 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply the packed dword signed integers in xmm2 and xmm3/m128/m32bcst and store the low 32 bits of each product in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 40 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply the packed dword signed integers in ymm2 and ymm3/m256/m32bcst and store the low 32 bits of each product in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 40 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply the packed dword signed integers in zmm2 and zmm3/m512/m32bcst and store the low 32 bits of each product in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 40 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Multiply the packed qword signed integers in xmm2 and xmm3/m128/m64bcst and store the low 64 bits of each product in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 40 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Multiply the packed qword signed integers in ymm2 and ymm3/m256/m64bcst and store the low 64 bits of each product in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 40 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Multiply the packed qword signed integers in zmm2 and zmm3/m512/m64bcst and store the low 64 bits of each product in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULLW--Multiply Packed Integers and Store Low Result.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULLW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D5 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F D5 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in xmm2 and xmm3/m128, and store the low 16 bits of the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F D5 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in ymm2 and ymm3/m256, and store the low 16 bits of the results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG D5 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in xmm2 and xmm3/m128, and store the low 16 bits of the results in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG D5 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in ymm2 and ymm3/m256, and store the low 16 bits of the results in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG D5 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in zmm2 and zmm3/m512, and store the low 16 bits of the results in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMULTISHIFTQB--Select Packed Unaligned Bytes from Quadword Sources.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULTISHIFTQB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 83 /r</opc>
+			<cpuid>
+				<flag>AVX512VBMI</flag>
+				<flag>AVX512VL</flag>
+			</cpuid>
+			<dscrp>Select unaligned bytes from qwords in xmm3/m128/m64bcst using control bytes in xmm2, write byte results to xmm1 under k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULTISHIFTQB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 83 /r</opc>
+			<cpuid>
+				<flag>AVX512VBMI</flag>
+				<flag>AVX512VL</flag>
+			</cpuid>
+			<dscrp>Select unaligned bytes from qwords in ymm3/m256/m64bcst using control bytes in ymm2, write byte results to ymm1 under k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULTISHIFTQB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 83 /r</opc>
+			<cpuid>
+				<flag>AVX512VBMI</flag>
+			</cpuid>
+			<dscrp>Select unaligned bytes from qwords in zmm3/m512/m64bcst using control bytes in zmm2, write byte results to zmm1 under k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULUDQ--Multiply Packed Unsigned Doubleword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULUDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F4 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers in xmm2/m128, and store the quadword results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULUDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F4 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed unsigned doubleword integers in xmm2 by packed unsigned doubleword integers in xmm3/m128, and store the quadword results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULUDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F4 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply packed unsigned doubleword integers in ymm2 by packed unsigned doubleword integers in ymm3/m256, and store the quadword results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULUDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 F4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed unsigned doubleword integers in xmm2 by packed unsigned doubleword integers in xmm3/m128/m64bcst, and store the quadword results in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULUDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 F4 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed unsigned doubleword integers in ymm2 by packed unsigned doubleword integers in ymm3/m256/m64bcst, and store the quadword results in ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULUDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 F4 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Multiply packed unsigned doubleword integers in zmm2 by packed unsigned doubleword integers in zmm3/m512/m64bcst, and store the quadword results in zmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>POR--Bitwise Logical Or.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>POR</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F EB /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPOR</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG EB /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of xmm2/m128 and xmm3.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPOR</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG EB /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of ymm2/m256 and ymm3.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPORD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 EB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of packed doubleword integers in xmm2 and xmm3/m128/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPORD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 EB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of packed doubleword integers in ymm2 and ymm3/m256/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPORD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 EB /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of packed doubleword integers in zmm2 and zmm3/m512/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPORQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 EB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of packed quadword integers in xmm2 and xmm3/m128/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPORQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 EB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of packed quadword integers in ymm2 and ymm3/m256/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPORQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 EB /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of packed quadword integers in zmm2 and zmm3/m512/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PROLD/PROLVD/PROLQ/PROLVQ--Bit Rotate Left.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLVD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in xmm2 left by count in the corresponding element of xmm3/m128/m32bcst. Result written to xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.128.66.0F.W0 72 /1 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in xmm2/m128/m32bcst left by imm8. Result written to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLVQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in xmm2 left by count in the corresponding element of xmm3/m128/m64bcst. Result written to xmm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.128.66.0F.W1 72 /1 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in xmm2/m128/m64bcst left by imm8. Result written to xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLVD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in ymm2 left by count in the corresponding element of ymm3/m256/m32bcst. Result written to ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.256.66.0F.W0 72 /1 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in ymm2/m256/m32bcst left by imm8. Result written to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLVQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in ymm2 left by count in the corresponding element of ymm3/m256/m64bcst. Result written to ymm1 under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.256.66.0F.W1 72 /1 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in ymm2/m256/m64bcst left by imm8. Result written to ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLVD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 15 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate left of doublewords in zmm2 by count in the corresponding element of zmm3/m512/m32bcst. Result written to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.512.66.0F.W0 72 /1 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate left of doublewords in zmm3/m512/m32bcst by imm8. Result written to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLVQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 15 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in zmm2 left by count in the corresponding element of zmm3/m512/m64bcst. Result written to zmm1under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPROLQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.512.66.0F.W1 72 /1 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in zmm2/m512/m64bcst left by imm8. Result written to zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV-VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PRORD/PRORVD/PRORQ/PRORVQ--Bit Rotate  Right.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORVD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in xmm2 right by count in the corresponding element of xmm3/m128/m32bcst, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.128.66.0F.W0 72 /0 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in xmm2/m128/m32bcst right by imm8, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORVQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in xmm2 right by count in the corresponding element of xmm3/m128/m64bcst, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.128.66.0F.W1 72 /0 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in xmm2/m128/m64bcst right by imm8, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORVD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in ymm2 right by count in the corresponding element of ymm3/m256/m32bcst, store using result writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.256.66.0F.W0 72 /0 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in ymm2/m256/m32bcst right by imm8, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORVQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in ymm2 right by count in the corresponding element of ymm3/m256/m64bcst, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.256.66.0F.W1 72 /0 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in ymm2/m256/m64bcst right by imm8, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORVD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 14 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in zmm2 right by count in the corresponding element of zmm3/m512/m32bcst, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.512.66.0F.W0 72 /0 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate doublewords in zmm2/m512/m32bcst right by imm8, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORVQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 14 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in zmm2 right by count in the corresponding element of zmm3/m512/m64bcst, store result using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPRORQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.512.66.0F.W1 72 /0 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rotate quadwords in zmm2/m512/m64bcst right by imm8, store result using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV-VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV-RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPSCATTERDD/VPSCATTERDQ/VPSCATTERQD/VPSCATTERQQ--Scatter Packed Dword, Packed Qword with Signed Dword, Signed Qword Indices.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERDD</mnem>
+			<args>vm32x {k1},xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 A0 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter dword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERDD</mnem>
+			<args>vm32y {k1},ymm1</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 A0 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter dword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERDD</mnem>
+			<args>vm32z {k1},zmm1</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 A0 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter dword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERDQ</mnem>
+			<args>vm32x {k1},xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 A0 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter qword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERDQ</mnem>
+			<args>vm32x {k1},ymm1</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 A0 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter qword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERDQ</mnem>
+			<args>vm32y {k1},zmm1</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 A0 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter qword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERQD</mnem>
+			<args>vm64x {k1},xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 A1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter dword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERQD</mnem>
+			<args>vm64y {k1},xmm1</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 A1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter dword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERQD</mnem>
+			<args>vm64z {k1},ymm1</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 A1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter dword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERQQ</mnem>
+			<args>vm64x {k1},xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 A1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter qword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERQQ</mnem>
+			<args>vm64y {k1},ymm1</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 A1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter qword values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSCATTERQQ</mnem>
+			<args>vm64z {k1},zmm1</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 A1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter qword values to memory using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSHUFB--Packed Shuffle Bytes.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSHUFB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 00 /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Shuffle bytes in xmm1 according to contents of xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38 00 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle bytes in xmm2 according to contents of xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38 00 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shuffle bytes in ymm2 according to contents of ymm3/m256.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.WIG 00 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shuffle bytes in xmm2 according to contents of xmm3/m128 under write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.WIG 00 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shuffle bytes in ymm2 according to contents of ymm3/m256 under write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.WIG 00 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shuffle bytes in zmm2 according to contents of zmm3/m512 under write mask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSHUFHW--Shuffle Packed High Words.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSHUFHW</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">F3 0F 70 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFHW</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">VEX.128.F3.0F 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFHW</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.F3.0F 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shuffle the high words in ymm2/m256 based on the encoding in imm8 and store the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFHW</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128,imm8</args>
+			<opc openc="FVM">EVEX.128.F3.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1 under write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFHW</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256,imm8</args>
+			<opc openc="FVM">EVEX.256.F3.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shuffle the high words in ymm2/m256 based on the encoding in imm8 and store the result in ymm1 under write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFHW</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512,imm8</args>
+			<opc openc="FVM">EVEX.512.F3.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shuffle the high words in zmm2/m512 based on the encoding in imm8 and store the result in zmm1 under write mask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSHUFLW--Shuffle Packed Low Words.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSHUFLW</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">F2 0F 70 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFLW</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">VEX.128.F2.0F 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFLW</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.F2.0F 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shuffle the low words in ymm2/m256 based on the encoding in imm8 and store the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFLW</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128,imm8</args>
+			<opc openc="FVM">EVEX.128.F2.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1 under write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFLW</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256,imm8</args>
+			<opc openc="FVM">EVEX.256.F2.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shuffle the low words in ymm2/m256 based on the encoding in imm8 and store the result in ymm1 under write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFLW</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512,imm8</args>
+			<opc openc="FVM">EVEX.512.F2.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shuffle the low words in zmm2/m512 based on the encoding in imm8 and store the result in zmm1 under write mask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSHUFD--Shuffle Packed Doublewords.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSHUFD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F 70 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">VEX.128.66.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFD</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.66.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shuffle the doublewords in ymm2/m256 based on the encoding in imm8 and store the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F.W0 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle the doublewords in xmm2/m128/m32bcst based on the encoding in imm8 and store the result in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F.W0 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle the doublewords in ymm2/m256/m32bcst based on the encoding in imm8 and store the result in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.512.66.0F.W0 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle the doublewords in zmm2/m512/m32bcst based on the encoding in imm8 and store the result in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSLLDQ--Byte Shift Left.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLDQ</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 73 /7 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift xmm1 left by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLDQ</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 73 /7 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift xmm2 left by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLDQ</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 73 /7 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift ymm2 left by imm8 bytes while shifting in 0s and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLDQ</mnem>
+			<args>xmm1,xmm2/ m128,imm8</args>
+			<opc openc="FVM">EVEX.NDD.128.66.0F.WIG 73 /7 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift xmm2/m128 left by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLDQ</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDD.256.66.0F.WIG 73 /7 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift ymm2/m256 left by imm8 bytes while shifting in 0s and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLDQ</mnem>
+			<args>zmm1,zmm2/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDD.512.66.0F.WIG 73 /7 ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift zmm2/m512 left by imm8 bytes while shifting in 0s and store result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>Imm8</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>EVEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSLLW/PSLLD/PSLLQ--Bit Shift Left.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F1/r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm1 left by amount specified in xmm2/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLW</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 71 /6 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm1 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLD</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 72 /6 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm1 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLQ</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 73 /6 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm1 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F1 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 71 /6 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F2 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 72 /6 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F3 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 73 /6 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F1 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 71 /6 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F2 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 72 /6 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F3 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 73 /6 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.128.66.0F.WIG F1 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 left by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.256.66.0F.WIG F1 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 left by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.512.66.0F.WIG F1 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in zmm2 left by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDD.128.66.0F.WIG 71 /6 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2/m128 left by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDD.256.66.0F.WIG 71 /6 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2/m256 left by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDD.512.66.0F.WIG 71 /6 ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in zmm2/m512 left by imm8 while shifting in 0 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.128.66.0F.W0 F2 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 left by amount specified in xmm3/m128 while shifting in 0s under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.256.66.0F.W0 F2 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 left by amount specified in xmm3/m128 while shifting in 0s under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.512.66.0F.W0 F2 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in zmm2 left by amount specified in xmm3/m128 while shifting in 0s under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.128.66.0F.W0 72 /6 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2/m128/m32bcst left by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.256.66.0F.W0 72 /6 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2/m256/m32bcst left by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.512.66.0F.W0 72 /6 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in zmm2/m512/m32bcst left by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.128.66.0F.W1 F3 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 left by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.256.66.0F.W1 F3 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 left by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.512.66.0F.W1 F3 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in zmm2 left by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.128.66.0F.W1 73 /6 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2/m128/m64bcst left by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.256.66.0F.W1 73 /6 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2/m256/m64bcst left by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.512.66.0F.W1 73 /6 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in zmm2/m512/m64bcst left by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>Imm8</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>EVEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVI">
+			<oprnd1>EVEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="M128">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSRAW/PSRAD/PSRAQ--Bit Shift Arithmetic Right.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRAW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E1/r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm1 right by amount specified in xmm2/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRAW</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 71 /4 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm1 right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRAD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E2 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm1 right by amount specified in xmm2/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRAD</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 72 /4 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm1 right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG E1 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by amount specified in xmm3/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 71 /4 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG E2 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by amount specified in xmm3/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 72 /4 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>ymm1,ymm2,ymm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG E1 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by amount specified in ymm3/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 71 /4 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG E2 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by amount specified in ymm3/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 72 /4 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.128.66.0F.WIG E1 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.256.66.0F.WIG E1 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.512.66.0F.WIG E1 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in zmm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDD.128.66.0F.WIG 71 /4 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2/m128 right by imm8 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDD.256.66.0F.WIG 71 /4 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2/m256 right by imm8 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDD.512.66.0F.WIG 71 /4 ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in zmm2/m512 right by imm8 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.128.66.0F.W0 E2 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.256.66.0F.W0 E2 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.512.66.0F.W0 E2 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in zmm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.128.66.0F.W0 72 /4 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2/m128/m32bcst right by imm8 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.256.66.0F.W0 72 /4 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2/m256/m32bcst right by imm8 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.512.66.0F.W0 72 /4 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in zmm2/m512/m32bcst right by imm8 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.128.66.0F.W1 E2 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.256.66.0F.W1 E2 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.512.66.0F.W1 E2 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in zmm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.128.66.0F.W1 72 /4 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2/m128/m64bcst right by imm8 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.256.66.0F.W1 72 /4 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2/m256/m64bcst right by imm8 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.512.66.0F.W1 72 /4 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in zmm2/m512/m64bcst right by imm8 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>Imm8</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>EVEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVI">
+			<oprnd1>EVEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="M128">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSRLDQ--Byte Shift Right.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLDQ</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 73 /3 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift xmm1 right by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLDQ</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F 73 /3 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift xmm2 right by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLDQ</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F 73 /3 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift ymm2 right by imm8 bytes while shifting in 0s and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLDQ</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDD.128.66.0F.WIG 73 /3 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift xmm2/m128 right by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLDQ</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDD.256.66.0F.WIG 73 /3 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift ymm2/m256 right by imm8 bytes while shifting in 0s and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLDQ</mnem>
+			<args>zmm1,zmm2/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDD.512.66.0F.WIG 73 /3 ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift zmm2/m512 right by imm8 bytes while shifting in 0s and store result in zmm1.</dscrp>
+		</ins>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>Imm8</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>EVEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSRLW/PSRLD/PSRLQ--Shift Packed Data Right Logical.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D1 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm1 right by amount specified in xmm2/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLW</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 71 /2 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm1 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D2 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm1 right by amount specified in xmm2/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLD</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 72 /2 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm1 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D3 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm1 right by amount specified in xmm2/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLQ</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 73 /2 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm1 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D1 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 71 /2 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D2 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 72 /2 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D3 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 73 /2 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D1 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 71 /2 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D2 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 72 /2 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D3 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 73 /2 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.128.66.0F.WIG D1 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.256.66.0F.WIG D1 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.512.66.0F.WIG D1 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in zmm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128,imm8</args>
+			<opc openc="FVM">EVEX.NDD.128.66.0F.WIG 71 /2 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2/m128 right by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256,imm8</args>
+			<opc openc="FVM">EVEX.NDD.256.66.0F.WIG 71 /2 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2/m256 right by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512,imm8</args>
+			<opc openc="FVM">EVEX.NDD.512.66.0F.WIG 71 /2 ib</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in zmm2/m512 right by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.128.66.0F.W0 D2 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.256.66.0F.W0 D2 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.512.66.0F.W0 D2 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in zmm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.128.66.0F.W0 72 /2 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2/m128/m32bcst right by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.256.66.0F.W0 72 /2 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2/m256/m32bcst right by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.512.66.0F.W0 72 /2 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in zmm2/m512/m32bcst right by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.128.66.0F.W1 D3 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.256.66.0F.W1 D3 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
+			<opc openc="M128">EVEX.NDS.512.66.0F.W1 D3 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in zmm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.128.66.0F.W1 73 /2 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2/m128/m64bcst right by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDD.256.66.0F.W1 73 /2 ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2/m256/m64bcst right by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
+			<opc openc="FVI">EVEX.NDD.512.66.0F.W1 73 /2 ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in zmm2/m512/m64bcst right by imm8 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>Imm8</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>EVEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVI">
+			<oprnd1>EVEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="M128">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPSLLVW/VPSLLVD/VPSLLVQ--Variable Bit Shift Left Logical.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 47 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 left by amount specified in the corresponding element of xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 47 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 left by amount specified in the corresponding element of xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 47 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 left by amount specified in the corresponding element of ymm3/m256 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 47 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 left by amount specified in the corresponding element of ymm3/m256 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.W1 12 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 left by amount specified in the corresponding element of xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W1 12 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 left by amount specified in the corresponding element of ymm3/m256 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W1 12 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in zmm2 left by amount specified in the corresponding element of zmm3/m512 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 47 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 left by amount specified in the corresponding element of xmm3/m128/m32bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 47 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 left by amount specified in the corresponding element of ymm3/m256/m32bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 47 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in zmm2 left by amount specified in the corresponding element of zmm3/m512/m32bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 47 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 left by amount specified in the corresponding element of xmm3/m128/m64bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 47 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 left by amount specified in the corresponding element of ymm3/m256/m64bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 47 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in zmm2 left by amount specified in the corresponding element of zmm3/m512/m64bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPSRLVW/VPSRLVD/VPSRLVQ--Variable Bit Shift Right Logical.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 45 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by amount specified in the corresponding element of xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 45 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 right by amount specified in the corresponding element of xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 45 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by amount specified in the corresponding element of ymm3/m256 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 45 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 right by amount specified in the corresponding element of ymm3/m256 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.W1 10 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by amount specified in the corresponding element of xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W1 10 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by amount specified in the corresponding element of ymm3/m256 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W1 10 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in zmm2 right by amount specified in the corresponding element of zmm3/m512 while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 45 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by amount specified in the corresponding element of xmm3/m128/m32bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 45 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by amount specified in the corresponding element of ymm3/m256/m32bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 45 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in zmm2 right by amount specified in the corresponding element of zmm3/m512/m32bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 45 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 right by amount specified in the corresponding element of xmm3/m128/m64bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 45 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 right by amount specified in the corresponding element of ymm3/m256/m64bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 45 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in zmm2 right by amount specified in the corresponding element of zmm3/m512/m64bcst while shifting in 0s using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSUBB/PSUBW/PSUBD/PSUBQ--Packed Integer Subtract.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F8 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed byte integers in xmm2/m128 from xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F9 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed word integers in xmm2/m128 from xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F FA /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed doubleword integers in xmm2/m128 from xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F FB/r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed quadword integers in xmm2/m128 from xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F8 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed byte integers in xmm3/m128 from xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F9 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed word integers in xmm3/m128 from xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FA /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed doubleword integers in xmm3/m128 from xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FB/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed quadword integers in xmm3/m128 from xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F8 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed byte integers in ymm3/m256 from ymm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F9 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed word integers in ymm3/m256 from ymm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FA /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed doubleword integers in ymm3/m256 from ymm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FB/r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed quadword integers in ymm3/m256 from ymm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG F8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed byte integers in xmm3/m128 from xmm2 and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG F8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed byte integers in ymm3/m256 from ymm2 and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG F8 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed byte integers in zmm3/m512 from zmm2 and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG F9 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed word integers in xmm3/m128 from xmm2 and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG F9 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed word integers in ymm3/m256 from ymm2 and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG F9 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed word integers in zmm3/m512 from zmm2 and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 FA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed doubleword integers in xmm3/m128/m32bcst from xmm2 and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 FA /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed doubleword integers in ymm3/m256/m32bcst from ymm2 and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 FA /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed doubleword integers in zmm3/m512/m32bcst from zmm2 and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 FB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed quadword integers in xmm3/m128/m64bcst from xmm2 and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 FB /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed quadword integers in ymm3/m256/m64bcst from ymm2 and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 FB/r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed quadword integers in zmm3/m512/m64bcst from zmm2 and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSUBSB/PSUBSW--Subtract Packed Signed Integers with Signed Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E8 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed byte integers in xmm2/m128 from packed signed byte integers in xmm1 and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E9 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed word integers in xmm2/m128 from packed signed word integers in xmm1 and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F E8 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed byte integers in xmm3/m128 from packed signed byte integers in xmm2 and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F E9 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed word integers in xmm3/m128 from packed signed word integers in xmm2 and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F E8 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed byte integers in ymm3/m256 from packed signed byte integers in ymm2 and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F E9 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed word integers in ymm3/m256 from packed signed word integers in ymm2 and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG E8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed byte integers in xmm3/m128 from packed signed byte integers in xmm2 and saturate results and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG E8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed byte integers in ymm3/m256 from packed signed byte integers in ymm2 and saturate results and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG E8 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed byte integers in zmm3/m512 from packed signed byte integers in zmm2 and saturate results and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG E9 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed word integers in xmm3/m128 from packed signed word integers in xmm2 and saturate results and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG E9 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed word integers in ymm3/m256 from packed signed word integers in ymm2 and saturate results and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG E9 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed word integers in zmm3/m512 from packed signed word integers in zmm2 and saturate results and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSUBUSB/PSUBUSW--Subtract Packed Unsigned Integers with Unsigned Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBUSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D8 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned byte integers in xmm2/m128 from packed unsigned byte integers in xmm1 and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBUSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D9 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned word integers in xmm2/m128 from packed unsigned word integers in xmm1 and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F D8 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned byte integers in xmm3/m128 from packed unsigned byte integers in xmm2 and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F D9 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned word integers in xmm3/m128 from packed unsigned word integers in xmm2 and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F D8 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned byte integers in ymm3/m256 from packed unsigned byte integers in ymm2 and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F D9 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned word integers in ymm3/m256 from packed unsigned word integers in ymm2 and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSB</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG D8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned byte integers in xmm3/m128 from packed unsigned byte integers in xmm2, saturate results and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSB</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG D8 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned byte integers in ymm3/m256 from packed unsigned byte integers in ymm2, saturate results and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSB</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG D8 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned byte integers in zmm3/m512 from packed unsigned byte integers in zmm2, saturate results and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG D9 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned word integers in xmm3/m128 from packed unsigned word integers in xmm2 and saturate results and store in xmm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG D9 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned word integers in ymm3/m256 from packed unsigned word integers in ymm2, saturate results and store in ymm1 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG D9 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned word integers in zmm3/m512 from packed unsigned word integers in zmm2, saturate results and store in zmm1 using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPTESTNMB/W/D/Q--Logical NAND and Set.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMB</mnem>
+			<args>k2 {k1},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.F3.0F38.W0 26 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed byte integers in xmm2 and xmm3/m128 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMB</mnem>
+			<args>k2 {k1},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.F3.0F38.W0 26 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed byte integers in ymm2 and ymm3/m256 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMB</mnem>
+			<args>k2 {k1},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.F3.0F38.W0 26 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed byte integers in zmm2 and zmm3/m512 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMW</mnem>
+			<args>k2 {k1},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.F3.0F38.W1 26 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed word integers in xmm2 and xmm3/m128 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMW</mnem>
+			<args>k2 {k1},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.F3.0F38.W1 26 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed word integers in ymm2 and ymm3/m256 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMW</mnem>
+			<args>k2 {k1},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.F3.0F38.W1 26 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed word integers in zmm2 and zmm3/m512 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMD</mnem>
+			<args>k2 {k1},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.F3.0F38.W0 27 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed doubleword integers in xmm2 and xmm3/m128/m32bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMD</mnem>
+			<args>k2 {k1},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.F3.0F38.W0 27 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed doubleword integers in ymm2 and ymm3/m256/m32bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMD</mnem>
+			<args>k2 {k1},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.F3.0F38.W0 27 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed doubleword integers in zmm2 and zmm3/m512/m32bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMQ</mnem>
+			<args>k2 {k1},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.F3.0F38.W1 27 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed quadword integers in xmm2 and xmm3/m128/m64bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMQ</mnem>
+			<args>k2 {k1},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.F3.0F38.W1 27 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed quadword integers in ymm2 and ymm3/m256/m64bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTNMQ</mnem>
+			<args>k2 {k1},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.F3.0F38.W1 27 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise NAND of packed quadword integers in zmm2 and zmm3/m512/m64bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PUNPCKHBW/PUNPCKHWD/PUNPCKHDQ/PUNPCKHQDQ--Unpack High Data.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKHBW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 68 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order bytes from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKHWD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 69 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order words from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKHDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 6A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order doublewords from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKHQDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 6D /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order quadword from xmm1 and xmm2/m128 into xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHBW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 68 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave high-order bytes from xmm2 and xmm3/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHWD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 69 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave high-order words from xmm2 and xmm3/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 6A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave high-order doublewords from xmm2 and xmm3/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHQDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 6D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave high-order quadword from xmm2 and xmm3/m128 into xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHBW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 68 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order bytes from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHWD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 69 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order words from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 6A /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order doublewords from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHQDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 6D /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order quadword from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHBW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 68 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave high-order bytes from xmm2 and xmm3/m128 into xmm1 register using k1 write mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHWD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 69 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave high-order words from xmm2 and xmm3/m128 into xmm1 register using k1 write mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 6A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave high-order doublewords from xmm2 and xmm3/m128/m32bcst into xmm1 register using k1 write mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHQDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 6D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave high-order quadword from xmm2 and xmm3/m128/m64bcst into xmm1 register using k1 write mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHBW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 68 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave high-order bytes from ymm2 and ymm3/m256 into ymm1 register using k1 write mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHWD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 69 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave high-order words from ymm2 and ymm3/m256 into ymm1 register using k1 write mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 6A /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave high-order doublewords from ymm2 and ymm3/m256/m32bcst into ymm1 register using k1 write mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHQDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 6D /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave high-order quadword from ymm2 and ymm3/m256/m64bcst into ymm1 register using k1 write mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHBW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 68/r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave high-order bytes from zmm2 and zmm3/m512 into zmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHWD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 69/r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave high-order words from zmm2 and zmm3/m512 into zmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 6A /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave high-order doublewords from zmm2 and zmm3/m512/m32bcst into zmm1 register using k1 write mask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHQDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 6D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave high-order quadword from zmm2 and zmm3/m512/m64bcst into zmm1 register using k1 write mask.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PUNPCKLBW/PUNPCKLWD/PUNPCKLDQ/PUNPCKLQDQ--Unpack Low Data.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKLBW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 60 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order bytes from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKLWD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 61 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order words from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKLDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 62 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order doublewords from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKLQDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 6C /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order quadword from xmm1 and xmm2/m128 into xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLBW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 60 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave low-order bytes from xmm2 and xmm3/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLWD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 61 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave low-order words from xmm2 and xmm3/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 62 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave low-order doublewords from xmm2 and xmm3/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLQDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 6C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave low-order quadword from xmm2 and xmm3/m128 into xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLBW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 60 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order bytes from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLWD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 61 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order words from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 62 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order doublewords from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLQDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 6C /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order quadword from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLBW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 60 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave low-order bytes from xmm2 and xmm3/m128 into xmm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLWD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 61 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave low-order words from xmm2 and xmm3/m128 into xmm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 62 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave low-order doublewords from xmm2 and xmm3/m128/m32bcst into xmm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLQDQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 6C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave low-order quadword from zmm2 and zmm3/m512/m64bcst into zmm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLBW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 60 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave low-order bytes from ymm2 and ymm3/m256 into ymm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLWD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 61 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave low-order words from ymm2 and ymm3/m256 into ymm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 62 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave low-order doublewords from ymm2 and ymm3/m256/m32bcst into ymm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLQDQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 6C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave low-order quadword from ymm2 and ymm3/m256/m64bcst into ymm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLBW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 60/r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave low-order bytes from zmm2 and zmm3/m512 into zmm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLWD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 61/r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Interleave low-order words from zmm2 and zmm3/m512 into zmm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 62 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave low-order doublewords from zmm2 and zmm3/m512/m32bcst into zmm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLQDQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 6C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Interleave low-order quadword from zmm2 and zmm3/m512/m64bcst into zmm1 register subject to write mask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2--Shuffle Packed Values at 128-bit Granularity.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFF32X4</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 23 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle 128-bit packed single-precision floating-point values selected by imm8 from ymm2 and ymm3/m256/m32bcst and place results in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFF32x4</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 23 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle 128-bit packed single-precision floating-point values selected by imm8 from zmm2 and zmm3/m512/m32bcst and place results in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFF64X2</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 23 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle 128-bit packed double-precision floating-point values selected by imm8 from ymm2 and ymm3/m256/m64bcst and place results in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFF64x2</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 23 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle 128-bit packed double-precision floating-point values selected by imm8 from zmm2 and zmm3/m512/m64bcst and place results in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFI32X4</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 43 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle 128-bit packed double-word values selected by imm8 from ymm2 and ymm3/m256/m32bcst and place results in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFI32x4</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 43 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle 128-bit packed double-word values selected by imm8 from zmm2 and zmm3/m512/m32bcst and place results in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFI64X2</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 43 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle 128-bit packed quad-word values selected by imm8 from ymm2 and ymm3/m256/m64bcst and place results in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFI64x2</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 43 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle 128-bit packed quad-word values selected by imm8 from zmm2 and zmm3/m512/m64bcst and place results in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHUFPD--Packed Interleave Shuffle of Pairs of Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHUFPD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F C6 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shuffle two pairs of double-precision floating-point values from xmm1 and xmm2/m128 using imm8 to select from each pair, interleaved result is stored in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F.WIG C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle two pairs of double-precision floating-point values from xmm2 and xmm3/m128 using imm8 to select from each pair, interleaved result is stored in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F.WIG C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle four pairs of double-precision floating-point values from ymm2 and ymm3/m256 using imm8 to select from each pair, interleaved result is stored in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPD</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle two paris of double-precision floating-point values from xmm2 and xmm3/m128/m64bcst using imm8 to select from each pair. store interleaved results in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPD</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle four paris of double-precision floating-point values from ymm2 and ymm3/m256/m64bcst using imm8 to select from each pair. store interleaved results in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPD</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shuffle eight paris of double-precision floating-point values from zmm2 and zmm3/m512/m64bcst using imm8 to select from each pair. store interleaved results in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHUFPS--Packed Interleave Shuffle of Quadruplets of Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHUFPS</mnem>
+			<args>xmm1,xmm3/m128,imm8</args>
+			<opc openc="RMI">0F C6 /r ib</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Select from quadruplet of single-precision floatingpoint values in xmm1 and xmm2/m128 using imm8, interleaved result pairs are stored in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.0F.WIG C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Select from quadruplet of single-precision floatingpoint values in xmm1 and xmm2/m128 using imm8, interleaved result pairs are stored in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.0F.WIG C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Select from quadruplet of single-precision floatingpoint values in ymm2 and ymm3/m256 using imm8, interleaved result pairs are stored in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPS</mnem>
+			<args>xmm1{k1}{z},xmm2,xmm3/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Select from quadruplet of single-precision floatingpoint values in xmm1 and xmm2/m128 using imm8, interleaved result pairs are stored in xmm1, subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPS</mnem>
+			<args>ymm1{k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Select from quadruplet of single-precision floatingpoint values in ymm2 and ymm3/m256 using imm8, interleaved result pairs are stored in ymm1, subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPS</mnem>
+			<args>zmm1{k1}{z},zmm2,zmm3/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Select from quadruplet of single-precision floatingpoint values in zmm2 and zmm3/m512 using imm8, interleaved result pairs are stored in zmm1, subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SQRTPD--Square Root of Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SQRTPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 51 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed double-precision floating-point values in xmm2/m128 and stores the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 51 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed double-precision floating-point values in xmm2/m128 and stores the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPD</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 51 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed double-precision floating-point values in ymm2/m256 and stores the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.66.0F.W1 51 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed double-precision floating-point values in xmm2/m128/m64bcst and stores the result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.66.0F.W1 51 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed double-precision floating-point values in ymm2/m256/m64bcst and stores the result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.512.66.0F.W1 51 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed double-precision floating-point values in zmm2/m512/m64bcst and stores the result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SQRTPS--Square Root of Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SQRTPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 51 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed single-precision floating-point values in xmm2/m128 and stores the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.0F.WIG 51 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed single-precision floating-point values in xmm2/m128 and stores the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPS</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.0F.WIG 51/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed single-precision floating-point values in ymm2/m256 and stores the result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.0F.W0 51 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed single-precision floating-point values in xmm2/m128/m32bcst and stores the result in xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.0F.W0 51 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed single-precision floating-point values in ymm2/m256/m32bcst and stores the result in ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.512.0F.W0 51/r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed single-precision floating-point values in zmm2/m512/m32bcst and stores the result in zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SQRTSD--Compute Square Root of Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SQRTSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 51/r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Computes square root of the low double-precision floatingpoint value in xmm2/m64 and stores the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 51/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes square root of the low double-precision floatingpoint value in xmm3/m64 and stores the results in xmm1. Also, upper double-precision floating-point value (bits[127:64]) from xmm2 is copied to xmm1[127:64].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 51/r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes square root of the low double-precision floatingpoint value in xmm3/m64 and stores the results in xmm1 under writemask k1. Also, upper double-precision floatingpoint value (bits[127:64]) from xmm2 is copied to xmm1[127:64].</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SQRTSS--Compute Square Root of Scalar Single-Precision Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SQRTSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 51 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Computes square root of the low single-precision floating-point value in xmm2/m32 and stores the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 51 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes square root of the low single-precision floating-point value in xmm3/m32 and stores the results in xmm1. Also, upper single-precision floating-point values (bits[127:32]) from xmm2 are copied to xmm1[127:32].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 51 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes square root of the low single-precision floating-point value in xmm3/m32 and stores the results in xmm1 under writemask k1. Also, upper single-precision floating-point values (bits[127:32]) from xmm2 are copied to xmm1[127:32].</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPTERNLOGD/VPTERNLOGQ--Bitwise Ternary Logic.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTERNLOGD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F3A.W0 25 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise ternary logic taking xmm1, xmm2 and xmm3/m128/m32bcst as source operands and writing the result to xmm1 under writemask k1 with dword granularity. The immediate value determines the specific binary function being implemented.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTERNLOGD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F3A.W0 25 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise ternary logic taking ymm1, ymm2 and ymm3/m256/m32bcst as source operands and writing the result to ymm1 under writemask k1 with dword granularity. The immediate value determines the specific binary function being implemented.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTERNLOGD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F3A.W0 25 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise ternary logic taking zmm1, zmm2 and zmm3/m512/m32bcst as source operands and writing the result to zmm1 under writemask k1 with dword granularity. The immediate value determines the specific binary function being implemented.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTERNLOGQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.DDS.128.66.0F3A.W1 25 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise ternary logic taking xmm1, xmm2 and xmm3/m128/m64bcst as source operands and writing the result to xmm1 under writemask k1 with qword granularity. The immediate value determines the specific binary function being implemented.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTERNLOGQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.DDS.256.66.0F3A.W1 25 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise ternary logic taking ymm1, ymm2 and ymm3/m256/m64bcst as source operands and writing the result to ymm1 under writemask k1 with qword granularity. The immediate value determines the specific binary function being implemented.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTERNLOGQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.DDS.512.66.0F3A.W1 25 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise ternary logic taking zmm1, zmm2 and zmm3/m512/m64bcst as source operands and writing the result to zmm1 under writemask k1 with qword granularity. The immediate value determines the specific binary function being implemented.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPTESTMB/VPTESTMW/VPTESTMD/VPTESTMQ--Logical AND and Set Mask.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMB</mnem>
+			<args>k2 {k1},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.W0 26 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed byte integers in xmm2 and xmm3/m128 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMB</mnem>
+			<args>k2 {k1},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W0 26 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed byte integers in ymm2 and ymm3/m256 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMB</mnem>
+			<args>k2 {k1},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W0 26 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed byte integers in zmm2 and zmm3/m512 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMW</mnem>
+			<args>k2 {k1},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.W1 26 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed word integers in xmm2 and xmm3/m128 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMW</mnem>
+			<args>k2 {k1},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W1 26 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed word integers in ymm2 and ymm3/m256 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMW</mnem>
+			<args>k2 {k1},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W1 26 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed word integers in zmm2 and zmm3/m512 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMD</mnem>
+			<args>k2 {k1},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 27 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed doubleword integers in xmm2 and xmm3/m128/m32bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMD</mnem>
+			<args>k2 {k1},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 27 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed doubleword integers in ymm2 and ymm3/m256/m32bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMD</mnem>
+			<args>k2 {k1},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 27 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed doubleword integers in zmm2 and zmm3/m512/m32bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMQ</mnem>
+			<args>k2 {k1},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 27 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed quadword integers in xmm2 and xmm3/m128/m64bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMQ</mnem>
+			<args>k2 {k1},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 27 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed quadword integers in ymm2 and ymm3/m256/m64bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTESTMQ</mnem>
+			<args>k2 {k1},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 27 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of packed quadword integers in zmm2 and zmm3/m512/m64bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPSRAVW/VPSRAVD/VPSRAVQ--Variable Bit Shift Right Arithmetic.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 46 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by amount specified in the corresponding element of xmm3/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 46 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by amount specified in the corresponding element of ymm3/m256 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVW</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
+			<opc openc="FVM">EVEX.NDS.128.66.0F38.W1 11 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by amount specified in the corresponding element of xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVW</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
+			<opc openc="FVM">EVEX.NDS.256.66.0F38.W1 11 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by amount specified in the corresponding element of ymm3/m256 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVW</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
+			<opc openc="FVM">EVEX.NDS.512.66.0F38.W1 11 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift words in zmm2 right by amount specified in the corresponding element of zmm3/m512 while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 46 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by amount specified in the corresponding element of xmm3/m128/m32bcst while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 46 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by amount specified in the corresponding element of ymm3/m256/m32bcst while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 46 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in zmm2 right by amount specified in the corresponding element of zmm3/m512/m32bcst while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 46 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 right by amount specified in the corresponding element of xmm3/m128/m64bcst while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 46 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 right by amount specified in the corresponding element of ymm3/m256/m64bcst while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 46 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in zmm2 right by amount specified in the corresponding element of zmm3/m512/m64bcst while shifting in sign bits using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PXOR/PXORD/PXORQ--Exclusive Or.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PXOR</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F EF /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPXOR</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG EF /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of xmm3/m128 and xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPXOR</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG EF /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of ymm3/m256 and ymm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPXORD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W0 EF /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of packed doubleword integers in xmm2 and xmm3/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPXORD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W0 EF /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of packed doubleword integers in ymm2 and ymm3/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPXORD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W0 EF /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of packed doubleword integers in zmm2 and zmm3/m512/m32bcst using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPXORQ</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 EF /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of packed quadword integers in xmm2 and xmm3/m128 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPXORQ</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 EF /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of packed quadword integers in ymm2 and ymm3/m256 using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPXORQ</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 EF /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of packed quadword integers in zmm2 and zmm3/m512/m64bcst using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRANGEPD--Range Restriction Calculation For Packed Pairs of Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRANGEPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W1 50 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Calculate two RANGE operation output value from 2 pairs of double-precision floating-point values in xmm2 and xmm3/m128/m32bcst, store the results to xmm1 under the writemask k1. Imm8 specifies the comparison and sign of the range operation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRANGEPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 50 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Calculate four RANGE operation output value from 4pairs of double-precision floating-point values in ymm2 and ymm3/m256/m32bcst, store the results to ymm1 under the writemask k1. Imm8 specifies the comparison and sign of the range operation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRANGEPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 50 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Calculate eight RANGE operation output value from 8 pairs of double-precision floating-point values in zmm2 and zmm3/m512/m32bcst, store the results to zmm1 under the writemask k1. Imm8 specifies the comparison and sign of the range operation.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRANGEPS--Range Restriction Calculation For Packed Pairs of Float32 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRANGEPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F3A.W0 50 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Calculate four RANGE operation output value from 4 pairs of single-precision floating-point values in xmm2 and xmm3/m128/m32bcst, store the results to xmm1 under the writemask k1. Imm8 specifies the comparison and sign of the range operation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRANGEPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 50 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Calculate eight RANGE operation output value from 8 pairs of single-precision floating-point values in ymm2 and ymm3/m256/m32bcst, store the results to ymm1 under the writemask k1. Imm8 specifies the comparison and sign of the range operation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRANGEPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 50 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Calculate 16 RANGE operation output value from 16 pairs of single-precision floating-point values in zmm2 and zmm3/m512/m32bcst, store the results to zmm1 under the writemask k1. Imm8 specifies the comparison and sign of the range operation.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRANGESD--Range Restriction Calculation From a pair of Scalar Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRANGESD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W1 51 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Calculate a RANGE operation output value from 2 doubleprecision floating-point values in xmm2 and xmm3/m64, store the output to xmm1 under writemask. Imm8 specifies the comparison and sign of the range operation.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRANGESS--Range Restriction Calculation From a Pair of Scalar Float32 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRANGESS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W0 51 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Calculate a RANGE operation output value from 2 singleprecision floating-point values in xmm2 and xmm3/m32, store the output to xmm1 under writemask. Imm8 specifies the comparison and sign of the range operation.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRCP14PD--Compute Approximate Reciprocals of Packed Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP14PD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W1 4C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals of the packed doubleprecision floating-point values in xmm2/m128/m64bcst and stores the results in xmm1. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP14PD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W1 4C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals of the packed doubleprecision floating-point values in ymm2/m256/m64bcst and stores the results in ymm1. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP14PD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W1 4C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals of the packed doubleprecision floating-point values in zmm2/m512/m64bcst and stores the results in zmm1. Under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRCP14SD--Compute Approximate Reciprocal of Scalar Float64 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>T1S</mnem>
+			<args>VRCP14SD xmm1 {k1}{z},xmm2,xmm3/m64</args>
+			<opc openc="">EVEX.NDS.LIG.66.0F38.W1 4D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal of the scalar doubleprecision floating-point value in xmm3/m64 and stores the result in xmm1 using writemask k1. Also, upper double-precision floating-point value (bits[127:64]) from xmm2 is copied to xmm1[127:64].</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRCP14PS--Compute Approximate Reciprocals of Packed Float32 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP14PS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W0 4C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals of the packed singleprecision floating-point values in xmm2/m128/m32bcst and stores the results in xmm1. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP14PS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W0 4C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals of the packed singleprecision floating-point values in ymm2/m256/m32bcst and stores the results in ymm1. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP14PS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W0 4C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals of the packed singleprecision floating-point values in zmm2/m512/m32bcst and stores the results in zmm1. Under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRCP14SS--Compute Approximate Reciprocal of Scalar Float32 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP14SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W0 4D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal of the scalar singleprecision floating-point value in xmm3/m32 and stores the results in xmm1 using writemask k1. Also, upper doubleprecision floating-point value (bits[127:32]) from xmm2 is copied to xmm1[127:32].</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VREDUCEPD--Perform Reduction Transformation on Packed Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VREDUCEPD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W1 56 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Perform reduction transformation on packed double-precision floating point values in xmm2/m128/m32bcst by subtracting a number of fraction bits specified by the imm8 field. Stores the result in xmm1 register under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VREDUCEPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W1 56 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Perform reduction transformation on packed double-precision floating point values in ymm2/m256/m32bcst by subtracting a number of fraction bits specified by the imm8 field. Stores the result in ymm1 register under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VREDUCEPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W1 56 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Perform reduction transformation on double-precision floating point values in zmm2/m512/m32bcst by subtracting a number of fraction bits specified by the imm8 field. Stores the result in zmm1 register under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VREDUCESD--Perform a Reduction Transformation on a Scalar Float64 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VREDUCESD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W1 57 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Perform a reduction transformation on a scalar double-precision floating point value in xmm3/m64 by subtracting a number of fraction bits specified by the imm8 field. Also, upper double precision floating-point value (bits[127:64]) from xmm2 are copied to xmm1[127:64]. Stores the result in xmm1 register.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VREDUCEPS--Perform Reduction Transformation on Packed Float32 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VREDUCEPS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W0 56 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Perform reduction transformation on packed single-precision floating point values in xmm2/m128/m32bcst by subtracting a number of fraction bits specified by the imm8 field. Stores the result in xmm1 register under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VREDUCEPS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W0 56 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Perform reduction transformation on packed single-precision floating point values in ymm2/m256/m32bcst by subtracting a number of fraction bits specified by the imm8 field. Stores the result in ymm1 register under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VREDUCEPS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W0 56 /r ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Perform reduction transformation on packed single-precision floating point values in zmm2/m512/m32bcst by subtracting a number of fraction bits specified by the imm8 field. Stores the result in zmm1 register under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VREDUCESS--Perform a Reduction Transformation on a Scalar Float32 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VREDUCESS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W0 57 /r /ib</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Perform a reduction transformation on a scalar single-precision floating point value in xmm3/m32 by subtracting a number of fraction bits specified by the imm8 field. Also, upper single precision floating-point values (bits[127:32]) from xmm2 are copied to xmm1[127:32]. Stores the result in xmm1 register.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRNDSCALEPD--Round Packed Float64 Values To Include A Given Number Of Fraction Bits.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRNDSCALEPD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W1 09 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rounds packed double-precision floating point values in xmm2/m128/m64bcst to a number of fraction bits specified by the imm8 field. Stores the result in xmm1 register. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRNDSCALEPD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W1 09 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rounds packed double-precision floating point values in ymm2/m256/m64bcst to a number of fraction bits specified by the imm8 field. Stores the result in ymm1 register. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRNDSCALEPD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W1 09 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rounds packed double-precision floating-point values in zmm2/m512/m64bcst to a number of fraction bits specified by the imm8 field. Stores the result in zmm1 register using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRNDSCALESD--Round Scalar Float64 Value To Include A Given Number Of Fraction Bits.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRNDSCALESD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W1 0B /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rounds scalar double-precision floating-point value in xmm3/m64 to a number of fraction bits specified by the imm8 field. Stores the result in xmm1 register.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRNDSCALEPS--Round Packed Float32 Values To Include A Given Number Of Fraction Bits.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRNDSCALEPS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.128.66.0F3A.W0 08 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rounds packed single-precision floating point values in xmm2/m128/m32bcst to a number of fraction bits specified by the imm8 field. Stores the result in xmm1 register. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRNDSCALEPS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
+			<opc openc="FV">EVEX.256.66.0F3A.W0 08 /r ib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rounds packed single-precision floating point values in ymm2/m256/m32bcst to a number of fraction bits specified by the imm8 field. Stores the result in ymm1 register. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRNDSCALEPS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{sae},imm8</args>
+			<opc openc="FV">EVEX.512.66.0F3A.W0 08 /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rounds packed single-precision floating-point values in zmm2/m512/m32bcst to a number of fraction bits specified by the imm8 field. Stores the result in zmm1 register using writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRNDSCALESS--Round Scalar Float32 Value To Include A Given Number Of Fraction Bits.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRNDSCALESS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae},imm8</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W0 0A /r ib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Rounds scalar single-precision floating-point value in xmm3/m32 to a number of fraction bits specified by the imm8 field. Stores the result in xmm1 register under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRSQRT14PD--Compute Approximate Reciprocals of Square Roots of Packed Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT14PD</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W1 4E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal square roots of the packed double-precision floating-point values in xmm2/m128/m64bcst and stores the results in xmm1. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT14PD</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W1 4E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal square roots of the packed double-precision floating-point values in ymm2/m256/m64bcst and stores the results in ymm1. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT14PD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W1 4E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal square roots of the packed double-precision floating-point values in zmm2/m512/m64bcst and stores the results in zmm1 under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRSQRT14SD--Compute Approximate Reciprocal of Square Root of Scalar Float64 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT14SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W1 4F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal square root of the scalar double-precision floating-point value in xmm3/m64 and stores the result in the low quadword element of xmm1 using writemask k1. Bits[127:64] of xmm2 is copied to xmm1[127:64].</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRSQRT14PS--Compute Approximate Reciprocals of Square Roots of Packed Float32 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT14PS</mnem>
+			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
+			<opc openc="FV">EVEX.128.66.0F38.W0 4E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal square roots of the packed single-precision floating-point values in xmm2/m128/m32bcst and stores the results in xmm1. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT14PS</mnem>
+			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
+			<opc openc="FV">EVEX.256.66.0F38.W0 4E /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal square roots of the packed single-precision floating-point values in ymm2/m256/m32bcst and stores the results in ymm1. Under writemask.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT14PS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst</args>
+			<opc openc="FV">EVEX.512.66.0F38.W0 4E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal square roots of the packed single-precision floating-point values in zmm2/m512/m32bcst and stores the results in zmm1. Under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRSQRT14SS--Compute Approximate Reciprocal of Square Root of Scalar Float32 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT14SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W0 4F /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal square root of the scalar single-precision floating-point value in xmm3/m32 and stores the result in the low doubleword element of xmm1 using writemask k1. Bits[127:32] of xmm2 is copied to xmm1[127:32].</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VSCALEFPD--Scale Packed Float64 Values With Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCALEFPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 2C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Scale the packed double-precision floating-point values in xmm2 using values from xmm3/m128/m64bcst. Under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCALEFPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 2C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Scale the packed double-precision floating-point values in ymm2 using values from ymm3/m256/m64bcst. Under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCALEFPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 2C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Scale the packed double-precision floating-point values in zmm2 using values from zmm3/m512/m64bcst. Under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VSCALEFSD--Scale Scalar Float64 Values With Float64 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCALEFSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W1 2D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Scale the scalar double-precision floating-point values in xmm2 using the value from xmm3/m64. Under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VSCALEFPS--Scale Packed Float32 Values With Float32 Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCALEFPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 2C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Scale the packed single-precision floating-point values in xmm2 using values from xmm3/m128/m32bcst. Under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCALEFPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 2C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Scale the packed single-precision values in ymm2 using floating point values from ymm3/m256/m32bcst. Under writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCALEFPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 2C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Scale the packed single-precision floating-point values in zmm2 using floating-point values from zmm3/m512/m32bcst. Under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VSCALEFSS--Scale Scalar Float32 Value With Float32 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCALEFSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W0 2D /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Scale the scalar single-precision floating-point value in xmm2 using floating-point value from xmm3/m32. Under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VSCATTERDPS/VSCATTERDPD/VSCATTERQPS/VSCATTERQPD--Scatter Packed Single, Packed Double with Signed Dword and Qword Indices.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERDPS</mnem>
+			<args>vm32x {k1},xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 A2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter single-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERDPS</mnem>
+			<args>vm32y {k1},ymm1</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 A2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter single-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERDPS</mnem>
+			<args>vm32z {k1},zmm1</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 A2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter single-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERDPD</mnem>
+			<args>vm32x {k1},xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 A2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter double-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERDPD</mnem>
+			<args>vm32x {k1},ymm1</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 A2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter double-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERDPD</mnem>
+			<args>vm32y {k1},zmm1</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 A2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, scatter double-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERQPS</mnem>
+			<args>vm64x {k1},xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W0 A3 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter single-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERQPS</mnem>
+			<args>vm64y {k1},xmm1</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W0 A3 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter single-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERQPS</mnem>
+			<args>vm64z {k1},ymm1</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 A3 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter single-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERQPD</mnem>
+			<args>vm64x {k1},xmm1</args>
+			<opc openc="T1S">EVEX.128.66.0F38.W1 A3 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter double-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERQPD</mnem>
+			<args>vm64y {k1},ymm1</args>
+			<opc openc="T1S">EVEX.256.66.0F38.W1 A3 /vsib</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter double-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERQPD</mnem>
+			<args>vm64z {k1},zmm1</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 A3 /vsib</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, scatter double-precision floating-point values to memory using writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SUBPD--Subtract Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SUBPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 5C /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed double-precision floating-point values in xmm2/mem from xmm1 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 5C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed double-precision floating-point values in xmm3/mem from xmm2 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 5C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed double-precision floating-point values in ymm3/mem from ymm2 and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 5C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed double-precision floating-point values from xmm3/m128/m64bcst to xmm2 and store result in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 5C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed double-precision floating-point values from ymm3/m256/m64bcst to ymm2 and store result in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 5C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed double-precision floating-point values from zmm3/m512/m64bcst to zmm2 and store result in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SUBPS--Subtract Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SUBPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 5C /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Subtract packed single-precision floating-point values in xmm2/mem from xmm1 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 5C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed single-precision floating-point values in xmm3/mem from xmm2 and stores result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 5C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed single-precision floating-point values in ymm3/mem from ymm2 and stores result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 5C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed single-precision floating-point values from xmm3/m128/m32bcst to xmm2 and stores result in xmm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 5C /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed single-precision floating-point values from ymm3/m256/m32bcst to ymm2 and stores result in ymm1 with writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 5C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract packed single-precision floating-point values in zmm3/m512/m32bcst from zmm2 and stores result in zmm1 with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SUBSD--Subtract Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SUBSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 5C /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract the low double-precision floating-point value in xmm2/m64 from xmm1 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 5C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract the low double-precision floating-point value in xmm3/m64 from xmm2 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBSD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 5C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract the low double-precision floating-point value in xmm3/m64 from xmm2 and store the result in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SUBSS--Subtract Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SUBSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 5C /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Subtract the low single-precision floating-point value in xmm2/m32 from xmm1 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 5C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract the low single-precision floating-point value in xmm3/m32 from xmm2 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBSS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 5C /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Subtract the low single-precision floating-point value in xmm3/m32 from xmm2 and store the result in xmm1 under writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UCOMISD--Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UCOMISD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0F 2E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/mem64 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUCOMISD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 2E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/mem64 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUCOMISD</mnem>
+			<args>xmm1,xmm2/m64{sae}</args>
+			<opc openc="T1S">EVEX.LIG.66.0F.W1 2E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/m64 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UCOMISS--Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UCOMISS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">0F 2E /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUCOMISS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.0F.WIG 2E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUCOMISS</mnem>
+			<args>xmm1,xmm2/m32{sae}</args>
+			<opc openc="T1S">EVEX.LIG.0F.W0 2E /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UNPCKHPD--Unpack and Interleave High Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UNPCKHPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 15 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double-precision floating-point values from high quadwords of xmm1 and xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 15 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double-precision floating-point values from high quadwords of xmm2 and xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 15 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double-precision floating-point values from high quadwords of ymm2 and ymm3/m256.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double precision floating-point values from high quadwords of xmm2 and xmm3/m128/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double precision floating-point values from high quadwords of ymm2 and ymm3/m256/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 15 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double-precision floating-point values from high quadwords of zmm2 and zmm3/m512/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UNPCKHPS--Unpack and Interleave High Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UNPCKHPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 15 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of xmm1 and xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 15 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of xmm2 and xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 15 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of ymm2 and ymm3/m256.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of xmm2 and xmm3/m128/m32bcst and write result to xmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 15 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of ymm2 and ymm3/m256/m32bcst and write result to ymm1 subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 15 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of zmm2 and zmm3/m512/m32bcst and write result to zmm1 subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UNPCKLPD--Unpack and Interleave Low Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UNPCKLPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 14 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double-precision floating-point values from low quadwords of xmm1 and xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 14 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double-precision floating-point values from low quadwords of xmm2 and xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 14 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double-precision floating-point values from low quadwords of ymm2 and ymm3/m256.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double precision floating-point values from low quadwords of xmm2 and xmm3/m128/m64bcst subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double precision floating-point values from low quadwords of ymm2 and ymm3/m256/m64bcst subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 14 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double-precision floating-point values from low quadwords of zmm2 and zmm3/m512/m64bcst subject to write mask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UNPCKLPS--Unpack and Interleave Low Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UNPCKLPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 14 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of xmm1 and xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 14 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of xmm2 and xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ymm1,ymm2,ymm3/m256</mnem>
+			<args>void</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 14 /r VUNPCKLPS</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of ymm2 and ymm3/m256.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of xmm2 and xmm3/mem and write result to xmm1 subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 14 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of ymm2 and ymm3/mem and write result to ymm1 subject to write mask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 14 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of zmm2 and zmm3/m512/m32bcst and write result to zmm1 subject to write mask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XORPD--Bitwise Logical XOR of Packed Double Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XORPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 57/r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed doubleprecision floating-point values in xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 57 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed doubleprecision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 57 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed doubleprecision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.128.66.0F.W1 57 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed doubleprecision floating-point values in xmm2 and xmm3/m128/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPD</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.256.66.0F.W1 57 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed doubleprecision floating-point values in ymm2 and ymm3/m256/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPD</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
+			<opc openc="FV">EVEX.NDS.512.66.0F.W1 57 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed doubleprecision floating-point values in zmm2 and zmm3/m512/m64bcst subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XORPS--Bitwise Logical XOR of Packed Single Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XORPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 57 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in xmm1 and xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 57 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 57 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.128.0F.W0 57 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in xmm2 and xmm3/m128/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPS</mnem>
+			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.256.0F.W0 57 /r</opc>
+			<cpuid>
+				<flag>AVX512VL</flag>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in ymm2 and ymm3/m256/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPS</mnem>
+			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
+			<opc openc="FV">EVEX.NDS.512.0F.W0 57 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in zmm2 and zmm3/m512/m32bcst subject to writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KADDW/KADDB/KADDQ/KADDD--ADD Two Masks.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KADDW</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.0F.W0 4A /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Add 16 bits masks in k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KADDB</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W0 4A /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Add 8 bits masks in k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KADDQ</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.0F.W1 4A /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add 64 bits masks in k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KADDD</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W1 4A /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Add 32 bits masks in k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.1vvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KANDW/KANDB/KANDQ/KANDD--Bitwise Logical AND Masks.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KANDW</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.NDS.L1.0F.W0 41 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND 16 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KANDB</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W0 41 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Bitwise AND 8 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KANDQ</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.0F.W1 41 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND 64 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KANDD</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W1 41 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND 32 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.1vvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KANDNW/KANDNB/KANDNQ/KANDND--Bitwise Logical AND NOT Masks.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KANDNW</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.NDS.L1.0F.W0 42 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT 16 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KANDNB</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W0 42 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT 8 bits masks k1 and k2 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KANDNQ</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.0F.W1 42 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT 64 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KANDND</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W1 42 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT 32 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.1vvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KMOVW/KMOVB/KMOVQ/KMOVD--Move from and to Mask Registers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVW</mnem>
+			<args>k1,k2/m16</args>
+			<opc openc="RM">VEX.L0.0F.W0 90 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move 16 bits mask from k2/m16 and store the result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVB</mnem>
+			<args>k1,k2/m8</args>
+			<opc openc="RM">VEX.L0.66.0F.W0 90 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Move 8 bits mask from k2/m8 and store the result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVQ</mnem>
+			<args>k1,k2/m64</args>
+			<opc openc="RM">VEX.L0.0F.W1 90 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move 64 bits mask from k2/m64 and store the result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVD</mnem>
+			<args>k1,k2/m32</args>
+			<opc openc="RM">VEX.L0.66.0F.W1 90 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move 32 bits mask from k2/m32 and store the result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVW</mnem>
+			<args>m16,k1</args>
+			<opc openc="MR">VEX.L0.0F.W0 91 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move 16 bits mask from k1 and store the result in m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVB</mnem>
+			<args>m8,k1</args>
+			<opc openc="MR">VEX.L0.66.0F.W0 91 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Move 8 bits mask from k1 and store the result in m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVQ</mnem>
+			<args>m64,k1</args>
+			<opc openc="MR">VEX.L0.0F.W1 91 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move 64 bits mask from k1 and store the result in m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVD</mnem>
+			<args>m32,k1</args>
+			<opc openc="MR">VEX.L0.66.0F.W1 91 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move 32 bits mask from k1 and store the result in m32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVW</mnem>
+			<args>k1,r32</args>
+			<opc openc="RR">VEX.L0.0F.W0 92 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move 16 bits mask from r32 to k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVB</mnem>
+			<args>k1,r32</args>
+			<opc openc="RR">VEX.L0.66.0F.W0 92 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Move 8 bits mask from r32 to k1.</dscrp>
+		</ins>
+		<ins x32m="I" x64m="V">
+			<mnem>KMOVQ</mnem>
+			<args>k1,r64</args>
+			<opc openc="RR">VEX.L0.F2.0F.W1 92 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move 64 bits mask from r64 to k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVD</mnem>
+			<args>k1,r32</args>
+			<opc openc="RR">VEX.L0.F2.0F.W0 92 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move 32 bits mask from r32 to k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVW</mnem>
+			<args>r32,k1</args>
+			<opc openc="RR">VEX.L0.0F.W0 93 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Move 16 bits mask from k1 to r32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVB</mnem>
+			<args>r32,k1</args>
+			<opc openc="RR">VEX.L0.66.0F.W0 93 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Move 8 bits mask from k1 to r32.</dscrp>
+		</ins>
+		<ins x32m="I" x64m="V">
+			<mnem>KMOVQ</mnem>
+			<args>r64,k1</args>
+			<opc openc="RR">VEX.L0.F2.0F.W1 93 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move 64 bits mask from k1 to r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KMOVD</mnem>
+			<args>r32,k1</args>
+			<opc openc="RR">VEX.L0.F2.0F.W0 93 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Move 32 bits mask from k1 to r32.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w, ModRM:[7:6] must not be 11b)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KUNPCKBW/KUNPCKWD/KUNPCKDQ--Unpack for Mask Registers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KUNPCKBW</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.NDS.L1.66.0F.W0 4B /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Unpack and interleave 8 bits masks in k2 and k3 and write word result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KUNPCKWD</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.NDS.L1.0F.W0 4B /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Unpack and interleave 16 bits in k2 and k3 and write doubleword result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KUNPCKDQ</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.NDS.L1.0F.W1 4B /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Unpack and interleave 32 bits masks in k2 and k3 and write quadword result in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.1vvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KNOTW/KNOTB/KNOTQ/KNOTD--NOT Mask Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KNOTW</mnem>
+			<args>k1,k2</args>
+			<opc openc="RR">VEX.L0.0F.W0 44 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise NOT of 16 bits mask k2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KNOTB</mnem>
+			<args>k1,k2</args>
+			<opc openc="RR">VEX.L0.66.0F.W0 44 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Bitwise NOT of 8 bits mask k2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KNOTQ</mnem>
+			<args>k1,k2</args>
+			<opc openc="RR">VEX.L0.0F.W1 44 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise NOT of 64 bits mask k2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KNOTD</mnem>
+			<args>k1,k2</args>
+			<opc openc="RR">VEX.L0.66.0F.W1 44 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise NOT of 32 bits mask k2.</dscrp>
+		</ins>
+		<oprndenc openc="RR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KORW/KORB/KORQ/KORD--Bitwise Logical OR Masks.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KORW</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.NDS.L1.0F.W0 45 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise OR 16 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KORB</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W0 45 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Bitwise OR 8 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KORQ</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.0F.W1 45 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise OR 64 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KORD</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W1 45 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise OR 32 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.1vvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KORTESTW/KORTESTB/KORTESTQ/KORTESTD--OR Masks And Set Flags.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KORTESTW</mnem>
+			<args>k1,k2</args>
+			<opc openc="RR">VEX.L0.0F.W0 98 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise OR 16 bits masks k1 and k2 and update ZF and CF accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KORTESTB</mnem>
+			<args>k1,k2</args>
+			<opc openc="RR">VEX.L0.66.0F.W0 98 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Bitwise OR 8 bits masks k1 and k2 and update ZF and CF accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KORTESTQ</mnem>
+			<args>k1,k2</args>
+			<opc openc="RR">VEX.L0.0F.W1 98 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise OR 64 bits masks k1 and k2 and update ZF and CF accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KORTESTD</mnem>
+			<args>k1,k2</args>
+			<opc openc="RR">VEX.L0.66.0F.W1 98 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise OR 32 bits masks k1 and k2 and update ZF and CF accordingly.</dscrp>
+		</ins>
+		<oprndenc openc="RR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KSHIFTLW/KSHIFTLB/KSHIFTLQ/KSHIFTLD--Shift Left Mask Registers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KSHIFTLW</mnem>
+			<args>k1,k2,imm8</args>
+			<opc openc="RRI">VEX.L0.66.0F3A.W1 32 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift left 16 bits in k2 by immediate and write result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KSHIFTLB</mnem>
+			<args>k1,k2,imm8</args>
+			<opc openc="RRI">VEX.L0.66.0F3A.W0 32 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Shift left 8 bits in k2 by immediate and write result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KSHIFTLQ</mnem>
+			<args>k1,k2,imm8</args>
+			<opc openc="RRI">VEX.L0.66.0F3A.W1 33 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift left 64 bits in k2 by immediate and write result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KSHIFTLD</mnem>
+			<args>k1,k2,imm8</args>
+			<opc openc="RRI">VEX.L0.66.0F3A.W0 33 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift left 32 bits in k2 by immediate and write result in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RRI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KSHIFTRW/KSHIFTRB/KSHIFTRQ/KSHIFTRD--Shift Right Mask Registers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KSHIFTRW</mnem>
+			<args>k1,k2,imm8</args>
+			<opc openc="RRI">VEX.L0.66.0F3A.W1 30 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Shift right 16 bits in k2 by immediate and write result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KSHIFTRB</mnem>
+			<args>k1,k2,imm8</args>
+			<opc openc="RRI">VEX.L0.66.0F3A.W0 30 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Shift right 8 bits in k2 by immediate and write result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KSHIFTRQ</mnem>
+			<args>k1,k2,imm8</args>
+			<opc openc="RRI">VEX.L0.66.0F3A.W1 31 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift right 64 bits in k2 by immediate and write result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KSHIFTRD</mnem>
+			<args>k1,k2,imm8</args>
+			<opc openc="RRI">VEX.L0.66.0F3A.W0 31 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Shift right 32 bits in k2 by immediate and write result in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RRI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KXNORW/KXNORB/KXNORQ/KXNORD--Bitwise Logical XNOR Masks.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KXNORW</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.NDS.L1.0F.W0 46 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise XNOR 16 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KXNORB</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W0 46 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Bitwise XNOR 8 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KXNORQ</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.0F.W1 46 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise XNOR 64 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KXNORD</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W1 46 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise XNOR 32 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.1vvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KTESTW/KTESTB/KTESTQ/KTESTD--Packed Bit Test Masks and Set Flags.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>RR</mnem>
+			<args>KTESTW k1,k2</args>
+			<opc openc="">VEX.L0.0F.W0 99 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Set ZF and CF depending on sign bit AND and ANDN of 16 bits mask register sources.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RR</mnem>
+			<args>KTESTB k1,k2</args>
+			<opc openc="">VEX.L0.66.0F.W0 99 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Set ZF and CF depending on sign bit AND and ANDN of 8 bits mask register sources.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RR</mnem>
+			<args>KTESTQ k1,k2</args>
+			<opc openc="">VEX.L0.0F.W1 99 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Set ZF and CF depending on sign bit AND and ANDN of 64 bits mask register sources.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RR</mnem>
+			<args>KTESTD k1,k2</args>
+			<opc openc="">VEX.L0.66.0F.W1 99 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Set ZF and CF depending on sign bit AND and ANDN of 32 bits mask register sources.</dscrp>
+		</ins>
+		<oprndenc openc="RR">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>KXORW/KXORB/KXORQ/KXORD--Bitwise Logical XOR Masks.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>KXORW</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.NDS.L1.0F.W0 47 /r</opc>
+			<cpuid>
+				<flag>AVX512F</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR 16 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KXORB</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W0 47 /r</opc>
+			<cpuid>
+				<flag>AVX512DQ</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR 8 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KXORQ</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.0F.W1 47 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR 64 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>KXORD</mnem>
+			<args>k1,k2,k3</args>
+			<opc openc="RVR">VEX.L1.66.0F.W1 47 /r</opc>
+			<cpuid>
+				<flag>AVX512BW</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR 32 bits masks k2 and k3 and place result in k1.</dscrp>
+		</ins>
+		<oprndenc openc="RVR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.1vvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VEXP2PD--Approximation to the Exponential 2^x of Packed Double-Precision Floating-Point Values with Less Than 2^-23 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXP2PD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst {sae}</args>
+			<opc openc="FV">EVEX.512.66.0F38.W1 C8 /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes approximations to the exponential 2^x (with less than 2^-23 of maximum relative error) of the packed doubleprecision floating-point values from zmm2/m512/m64bcst and stores the floating-point result in zmm1with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VEXP2PS--Approximation to the Exponential 2^x of Packed Single-Precision Floating-Point Values with Less Than 2^-23 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXP2PS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst {sae}</args>
+			<opc openc="FV">EVEX.512.66.0F38.W0 C8 /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes approximations to the exponential 2^x (with less than 2^-23 of maximum relative error) of the packed singleprecision floating-point values from zmm2/m512/m32bcst and stores the floating-point result in zmm1with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRCP28PD--Approximation to the Reciprocal of Packed Double-Precision Floating-Point Values with Less Than 2^-28 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP28PD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst {sae}</args>
+			<opc openc="FV">EVEX.512.66.0F38.W1 CA /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals ( &lt; 2^-28 relative error) of the packed double-precision floating-point values in zmm2/m512/m64bcst and stores the results in zmm1. Under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRCP28SD--Approximation to the Reciprocal of Scalar Double-Precision Floating-Point Value with Less Than 2^-28 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP28SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64 {sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W1 CB /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal ( &lt; 2^-28 relative error) of the scalar double-precision floating-point value in xmm3/m64 and stores the results in xmm1. Under writemask. Also, upper double-precision floating-point value (bits[127:64]) from xmm2 is copied to xmm1[127:64].</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRCP28PS--Approximation to the Reciprocal of Packed Single-Precision Floating-Point Values with Less Than 2^-28 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP28PS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst {sae}</args>
+			<opc openc="FV">EVEX.512.66.0F38.W0 CA /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals ( &lt; 2^-28 relative error) of the packed single-precision floating-point values in zmm2/m512/m32bcst and stores the results in zmm1. Under writemask.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRCP28SS--Approximation to the Reciprocal of Scalar Single-Precision Floating-Point Value with Less Than 2^-28 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCP28SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32 {sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W0 CB /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal ( &lt; 2^-28 relative error) of the scalar single-precision floating-point value in xmm3/m32 and stores the results in xmm1. Under writemask. Also, upper 3 single-precision floating-point values (bits[127:32]) from xmm2 is copied to xmm1[127:32].</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRSQRT28PD--Approximation to the Reciprocal Square Root of Packed Double-Precision Floating-Point Values with Less Than 2^-28 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT28PD</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m64bcst {sae}</args>
+			<opc openc="FV">EVEX.512.66.0F38.W1 CC /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes approximations to the Reciprocal square root (&lt;2^28 relative error) of the packed double-precision floating-point values from zmm2/m512/m64bcst and stores result in zmm1with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRSQRT28SD--Approximation to the Reciprocal Square Root of Scalar Double-Precision Floating-Point Value with Less Than 2^-28 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT28SD</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m64 {sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W1 CD /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes approximate reciprocal square root (&lt;2^-28 relative error) of the scalar double-precision floating-point value from xmm3/m64 and stores result in xmm1with writemask k1. Also, upper double-precision floating-point value (bits[127:64]) from xmm2 is copied to xmm1[127:64].</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRSQRT28PS--Approximation to the Reciprocal Square Root of Packed Single-Precision Floating-Point Values with Less Than 2^-28 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT28PS</mnem>
+			<args>zmm1 {k1}{z},zmm2/m512/m32bcst {sae}</args>
+			<opc openc="FV">EVEX.512.66.0F38.W0 CC /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes approximations to the Reciprocal square root (&lt;2^-28 relative error) of the packed single-precision floating-point values from zmm2/m512/m32bcst and stores result in zmm1with writemask k1.</dscrp>
+		</ins>
+		<oprndenc openc="FV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VRSQRT28SS--Approximation to the Reciprocal Square Root of Scalar Single-Precision Floating-Point Value with Less Than 2^-28 Relative Error.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRT28SS</mnem>
+			<args>xmm1 {k1}{z},xmm2,xmm3/m32 {sae}</args>
+			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W0 CD /r</opc>
+			<cpuid>
+				<flag>AVX512ER</flag>
+			</cpuid>
+			<dscrp>Computes approximate reciprocal square root (&lt;2^-28 relative error) of the scalar single-precision floating-point value from xmm3/m32 and stores result in xmm1with writemask k1. Also, upper 3 single-precision floating-point value (bits[127:32]) from xmm2 is copied to xmm1[127:32].</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>EVEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGATHERPF0DPS/VGATHERPF0QPS/VGATHERPF0DPD/VGATHERPF0QPD--Sparse Prefetch Packed SP/DP Data Values with Signed Dword, Signed Qword Indices Using T0 Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERPF0DPS</mnem>
+			<args>vm32z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 C6 /1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing single-precision data using opmask k1 and T0 hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERPF0QPS</mnem>
+			<args>vm64z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 C7 /1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing single-precision data using opmask k1 and T0 hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERPF0DPD</mnem>
+			<args>vm32y {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 C6 /1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing double-precision data using opmask k1 and T0 hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERPF0QPD</mnem>
+			<args>vm64z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 C7 /1 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing double-precision data using opmask k1 and T0 hint.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGATHERPF1DPS/VGATHERPF1QPS/VGATHERPF1DPD/VGATHERPF1QPD--Sparse Prefetch Packed SP/DP Data Values with Signed Dword, Signed Qword Indices Using T1 Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERPF1DPS</mnem>
+			<args>vm32z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 C6 /2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing single-precision data using opmask k1 and T1 hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERPF1QPS</mnem>
+			<args>vm64z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 C7 /2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing single-precision data using opmask k1 and T1 hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERPF1DPD</mnem>
+			<args>vm32y {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 C6 /2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing double-precision data using opmask k1 and T1 hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERPF1QPD</mnem>
+			<args>vm64z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 C7 /2 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing double-precision data using opmask k1 and T1 hint.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VSCATTERPF0DPS/VSCATTERPF0QPS/VSCATTERPF0DPD/VSCATTERPF0QPD--Sparse Prefetch Packed SP/DP Data Values with Signed Dword, Signed Qword Indices Using T0 Hint with Intent to Write.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERPF0DPS</mnem>
+			<args>vm32z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 C6 /5 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing single-precision data using writemask k1 and T0 hint with intent to write.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERPF0QPS</mnem>
+			<args>vm64z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 C7 /5 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing single-precision data using writemask k1 and T0 hint with intent to write.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERPF0DPD</mnem>
+			<args>vm32y {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 C6 /5 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing double-precision data using writemask k1 and T0 hint with intent to write.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERPF0QPD</mnem>
+			<args>vm64z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 C7 /5 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing double-precision data using writemask k1 and T0 hint with intent to write.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VSCATTERPF1DPS/VSCATTERPF1QPS/VSCATTERPF1DPD/VSCATTERPF1QPD--Sparse Prefetch Packed SP/DP Data Values with Signed Dword, Signed Qword Indices Using T1 Hint with Intent to Write.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERPF1DPS</mnem>
+			<args>vm32z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 C6 /6 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing single-precision data using writemask k1 and T1 hint with intent to write.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERPF1QPS</mnem>
+			<args>vm64z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W0 C7 /6 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing single-precision data using writemask k1 and T1 hint with intent to write.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERPF1DPD</mnem>
+			<args>vm32y {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 C6 /6 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing double-precision data using writemask k1 and T1 hint with intent to write.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSCATTERPF1QPD</mnem>
+			<args>vm64z {k1}</args>
+			<opc openc="T1S">EVEX.512.66.0F38.W1 C7 /6 /vsib</opc>
+			<cpuid>
+				<flag>AVX512PF</flag>
+			</cpuid>
+			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing double-precision data using writemask k1 and T1 hint with intent to write.</dscrp>
+		</ins>
+		<oprndenc openc="T1S">
+			<oprnd1>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHA1RNDS4--Perform Four Rounds of SHA1 Operation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHA1RNDS4</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">0F 3A CC /r ib</opc>
+			<cpuid>
+				<flag>SHA</flag>
+			</cpuid>
+			<dscrp>Performs four rounds of SHA1 operation operating on SHA1 state (A,B,C,D) from xmm1, with a pre-computed sum of the next 4 round message dwords and state variable E from xmm2/m128. The immediate byte controls logic functions and round constants.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHA1NEXTE--Calculate SHA1 State Variable E after Four Rounds.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHA1NEXTE</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 38 C8 /r</opc>
+			<cpuid>
+				<flag>SHA</flag>
+			</cpuid>
+			<dscrp>Calculates SHA1 state variable E after four rounds of operation from the current SHA1 state variable A in xmm1. The calculated value of the SHA1 state variable E is added to the scheduled dwords in xmm2/m128, and stored with some of the scheduled dwords in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHA1MSG1--Perform an Intermediate Calculation for the Next Four SHA1 Message Dwords.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHA1MSG1</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 38 C9 /r</opc>
+			<cpuid>
+				<flag>SHA</flag>
+			</cpuid>
+			<dscrp>Performs an intermediate calculation for the next four SHA1 message dwords using previous message dwords from xmm1 and xmm2/m128, storing the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHA1MSG2--Perform a Final Calculation for the Next Four SHA1 Message Dwords.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHA1MSG2</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 38 CA /r</opc>
+			<cpuid>
+				<flag>SHA</flag>
+			</cpuid>
+			<dscrp>Performs the final calculation for the next four SHA1 message dwords using intermediate results from xmm1 and the previous message dwords from xmm2/m128, storing the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHA256RNDS2--Perform Two Rounds of SHA256 Operation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHA256RNDS2</mnem>
+			<args>xmm1,xmm2/m128,&lt;XMM0&gt;</args>
+			<opc openc="RM0">0F 38 CB /r</opc>
+			<cpuid>
+				<flag>SHA</flag>
+			</cpuid>
+			<dscrp>Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H) from xmm1, an initial SHA256 state (A,B,E,F) from xmm2/m128, and a pre-computed sum of the next 2 round message dwords and the corresponding round constants from the implicit operand XMM0, storing the updated SHA256 state (A,B,E,F) result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Implicit XMM0(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHA256MSG1--Perform an Intermediate Calculation for the Next Four SHA256 Message Dwords.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHA256MSG1</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 38 CC /r</opc>
+			<cpuid>
+				<flag>SHA</flag>
+			</cpuid>
+			<dscrp>Performs an intermediate calculation for the next four SHA256 message dwords using previous message dwords from xmm1 and xmm2/m128, storing the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHA256MSG2--Perform a Final Calculation for the Next Four SHA256 Message Dwords.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHA256MSG2</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 38 CD /r</opc>
+			<cpuid>
+				<flag>SHA</flag>
+			</cpuid>
+			<dscrp>Performs the final calculation for the next four SHA256 message dwords using previous message dwords from xmm1 and xmm2/m128, storing the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PREFETCHWT1--Prefetch Vector Data Into Caches with Intent to Write and T1 Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PREFETCHWT1</mnem>
+			<args>m8</args>
+			<opc openc="M">0F 0D /2</opc>
+			<cpuid>
+				<flag>PREFETCHWT1</flag>
+			</cpuid>
+			<dscrp>Move data from m8 closer to the processor using T1 hint with intent to write.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CLWB--Cache Line Write Back (THIS IS AN EXAMPLE).</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CLWB</mnem>
+			<args>m8</args>
+			<opc openc="M">66 0F AE /6</opc>
+			<cpuid>
+				<flag>CLWB</flag>
+			</cpuid>
+			<dscrp>Writes back modified cache line containing m8, and may retain the line in cache hierarchy in non-modified state.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc=" ">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CLWB--Cache Line Write Back.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CLWB</mnem>
+			<args>m8</args>
+			<opc openc="M">66 0F AE /6</opc>
+			<cpuid>
+				<flag>CLWB</flag>
+			</cpuid>
+			<dscrp>Writes back modified cache line containing m8, and may retain the line in cache hierarchy in non-modified state.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PCOMMIT--Persistent Commit.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PCOMMIT</mnem>
+			<args>void</args>
+			<opc openc="NP">66 0F AE F8</opc>
+			<cpuid>
+				<flag>PCOMMIT</flag>
+			</cpuid>
+			<dscrp>Commits stores to persistent memory.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+</instrs>
\ No newline at end of file
diff --git a/xml/raw/x86/Intel/AZ.xml b/xml/raw/x86/Intel/AZ.xml
new file mode 100644
index 0000000..bc26aa0
--- /dev/null
+++ b/xml/raw/x86/Intel/AZ.xml
@@ -0,0 +1,22780 @@
+<?xml version="1.0" encoding="ASCII"?>
+<!DOCTYPE instrs SYSTEM "AZ_Rules.dtd">
+<!-- Copyright (c) 2016 Mahdi Safsafi
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+ -->
+<!--   https://github.com/MahdiSafsafi/Parsable-Instructions   -->
+<!-- 
+  This XML file includes all instructions found in :
+  Intel 64 and IA-32 Architectures Software Developers Manuals Volume 2 document.
+ -->
+<!-- 
+****KEY TO ABBREVIATIONS****
+  x32m = 32-bit mode support.
+  x64m = 64-bit mode support.
+  mnem = Instruction Mnemonic.
+  args = Instruction Arguments.
+  opc  = Opcodes.
+  openc = Operand Encoding.
+  dscrp = Description.
+  oprndenc = Instruction Operand Encoding.
+  oprnd1 = Operand 1.
+  oprnd2 = Operand 2.
+  oprnd3 = Operand 3.
+  oprnd4 = Operand 4.
+
+****FOR THE REST OF KEYS YOU SHOULD REFER TO INTEL DOCUMENTATIONS!****
+ -->
+<instrs version="1.00">
+	<common>
+		<brief>AAA--ASCII Adjust After Addition.</brief>
+		<ins x32m="V" x64m="I">
+			<mnem>AAA</mnem>
+			<args>void</args>
+			<opc openc="NP">37</opc>
+			<dscrp>ASCII adjust AL after addition.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>AAD--ASCII Adjust AX Before Division.</brief>
+		<ins x32m="V" x64m="I">
+			<mnem>AAD</mnem>
+			<args>void</args>
+			<opc openc="NP">D5 0A</opc>
+			<dscrp>ASCII adjust AX before division.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>AAD</mnem>
+			<args>imm8</args>
+			<opc openc="NP">D5 ib</opc>
+			<dscrp>Adjust AX before division to number base imm8.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>AAM--ASCII Adjust AX After Multiply.</brief>
+		<ins x32m="V" x64m="I">
+			<mnem>AAM</mnem>
+			<args>void</args>
+			<opc openc="NP">D4 0A</opc>
+			<dscrp>ASCII adjust AX after multiply.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>AAM</mnem>
+			<args>imm8</args>
+			<opc openc="NP">D4 ib</opc>
+			<dscrp>Adjust AX after multiply to number base imm8.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>AAS--ASCII Adjust AL After Subtraction.</brief>
+		<ins x32m="V" x64m="I">
+			<mnem>AAS</mnem>
+			<args>void</args>
+			<opc openc="NP">3F</opc>
+			<dscrp>ASCII adjust AL after subtraction.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ADC--Add with Carry.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ADC</mnem>
+			<args>AL,imm8</args>
+			<opc openc="I">14 ib</opc>
+			<dscrp>Add with carry imm8 to AL.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADC</mnem>
+			<args>AX,imm16</args>
+			<opc openc="I">15 iw</opc>
+			<dscrp>Add with carry imm16 to AX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADC</mnem>
+			<args>EAX,imm32</args>
+			<opc openc="I">15 id</opc>
+			<dscrp>Add with carry imm32 to EAX.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ADC</mnem>
+			<args>RAX,imm32</args>
+			<opc openc="I">REX.W + 15 id</opc>
+			<dscrp>Add with carry imm32 sign extended to 64bits to RAX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADC</mnem>
+			<args>r/m8,imm8*</args>
+			<opc openc="MI">80 /2 ib</opc>
+			<dscrp>Add with carry imm8 to r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ADC</mnem>
+			<args>r/m8,imm8</args>
+			<opc openc="MI">REX + 80 /2 ib</opc>
+			<dscrp>Add with carry imm8 to r/m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADC</mnem>
+			<args>r/m16,imm16</args>
+			<opc openc="MI">81 /2 iw</opc>
+			<dscrp>Add with carry imm16 to r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADC</mnem>
+			<args>r/m32,imm32</args>
+			<opc openc="MI">81 /2 id</opc>
+			<dscrp>Add with CF imm32 to r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ADC</mnem>
+			<args>r/m64,imm32</args>
+			<opc openc="MI">REX.W + 81 /2 id</opc>
+			<dscrp>Add with CF imm32 sign extended to 64-bits to r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADC</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">83 /2 ib</opc>
+			<dscrp>Add with CF sign-extended imm8 to r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADC</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">83 /2 ib</opc>
+			<dscrp>Add with CF sign-extended imm8 into r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ADC</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + 83 /2 ib</opc>
+			<dscrp>Add with CF sign-extended imm8 into r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADC</mnem>
+			<args>r/m8,r8**</args>
+			<opc openc="MR">10 /r</opc>
+			<dscrp>Add with carry byte register to r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ADC</mnem>
+			<args>r/m8,r8</args>
+			<opc openc="MR">REX + 10 /r</opc>
+			<dscrp>Add with carry byte register to r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADC</mnem>
+			<args>r/m16,r16</args>
+			<opc openc="MR">11 /r</opc>
+			<dscrp>Add with carry r16 to r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADC</mnem>
+			<args>r/m32,r32</args>
+			<opc openc="MR">11 /r</opc>
+			<dscrp>Add with CF r32 to r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ADC</mnem>
+			<args>r/m64,r64</args>
+			<opc openc="MR">REX.W + 11 /r</opc>
+			<dscrp>Add with CF r64 to r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADC</mnem>
+			<args>r8,r/m8**</args>
+			<opc openc="RM">12 /r</opc>
+			<dscrp>Add with carry r/m8 to byte register.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ADC</mnem>
+			<args>r8,r/m8</args>
+			<opc openc="RM">REX + 12 /r</opc>
+			<dscrp>Add with carry r/m64 to byte register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADC</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">13 /r</opc>
+			<dscrp>Add with carry r/m16 to r16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADC</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">13 /r</opc>
+			<dscrp>Add with CF r/m32 to r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ADC</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 13 /r</opc>
+			<dscrp>Add with CF r/m64 to r64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="I">
+			<oprnd1>AL/AX/EAX/RAX</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ADCX--Unsigned Integer Addition of Two Operands with Carry Flag.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ADCX</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">66 0F 38 F6 /r</opc>
+			<cpuid>
+				<flag>ADX</flag>
+			</cpuid>
+			<dscrp>Unsigned addition of r32 with CF, r/m32 to r32, writes CF.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ADCX</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">66 REX.w 0F 38 F6 /r</opc>
+			<cpuid>
+				<flag>ADX</flag>
+			</cpuid>
+			<dscrp>Unsigned addition of r64 with CF, r/m64 to r64, writes CF.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ADD--Add.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ADD</mnem>
+			<args>AL,imm8</args>
+			<opc openc="I">04 ib</opc>
+			<dscrp>Add imm8 to AL.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADD</mnem>
+			<args>AX,imm16</args>
+			<opc openc="I">05 iw</opc>
+			<dscrp>Add imm16 to AX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADD</mnem>
+			<args>EAX,imm32</args>
+			<opc openc="I">05 id</opc>
+			<dscrp>Add imm32 to EAX.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ADD</mnem>
+			<args>RAX,imm32</args>
+			<opc openc="I">REX.W + 05 id</opc>
+			<dscrp>Add imm32 sign-extended to 64-bits to RAX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADD</mnem>
+			<args>r/m8,imm8*</args>
+			<opc openc="MI">80 /0 ib</opc>
+			<dscrp>Add imm8 to r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ADD</mnem>
+			<args>r/m8,imm8</args>
+			<opc openc="MI">REX + 80 /0 ib</opc>
+			<dscrp>Add sign-extended imm8 to r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADD</mnem>
+			<args>r/m16,imm16</args>
+			<opc openc="MI">81 /0 iw</opc>
+			<dscrp>Add imm16 to r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADD</mnem>
+			<args>r/m32,imm32</args>
+			<opc openc="MI">81 /0 id</opc>
+			<dscrp>Add imm32 to r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ADD</mnem>
+			<args>r/m64,imm32</args>
+			<opc openc="MI">REX.W + 81 /0 id</opc>
+			<dscrp>Add imm32 sign-extended to 64-bits to r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADD</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">83 /0 ib</opc>
+			<dscrp>Add sign-extended imm8 to r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADD</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">83 /0 ib</opc>
+			<dscrp>Add sign-extended imm8 to r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ADD</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + 83 /0 ib</opc>
+			<dscrp>Add sign-extended imm8 to r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADD</mnem>
+			<args>r/m8,r8**</args>
+			<opc openc="MR">00 /r</opc>
+			<dscrp>Add r8 to r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ADD</mnem>
+			<args>r/m8,r8</args>
+			<opc openc="MR">REX + 00 /r</opc>
+			<dscrp>Add r8 to r/m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADD</mnem>
+			<args>r/m16,r16</args>
+			<opc openc="MR">01 /r</opc>
+			<dscrp>Add r16 to r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADD</mnem>
+			<args>r/m32,r32</args>
+			<opc openc="MR">01 /r</opc>
+			<dscrp>Add r32 to r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ADD</mnem>
+			<args>r/m64,r64</args>
+			<opc openc="MR">REX.W + 01 /r</opc>
+			<dscrp>Add r64 to r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADD</mnem>
+			<args>r8,r/m8**</args>
+			<opc openc="RM">02 /r</opc>
+			<dscrp>Add r/m8 to r8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ADD</mnem>
+			<args>r8,r/m8</args>
+			<opc openc="RM">REX + 02 /r</opc>
+			<dscrp>Add r/m8 to r8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADD</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">03 /r</opc>
+			<dscrp>Add r/m16 to r16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ADD</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">03 /r</opc>
+			<dscrp>Add r/m32 to r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ADD</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 03 /r</opc>
+			<dscrp>Add r/m64 to r64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="I">
+			<oprnd1>AL/AX/EAX/RAX</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ADDPD--Add Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ADDPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 58 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed double-precision floating-point values from xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 58 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed double-precision floating-point values from xmm3/mem to xmm2 and stores result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 58 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed double-precision floating-point values from ymm3/mem to ymm2 and stores result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ADDPS--Add Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ADDPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 58 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Add packed single-precision floating-point values from xmm2/m128 to xmm1 and stores result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 58 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed single-precision floating-point values from xmm3/mem to xmm2 and stores result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 58 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed single-precision floating-point values from ymm3/mem to ymm2 and stores result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ADDSD--Add Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ADDSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 58 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add the low double-precision floating-point value from xmm2/m64 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.LIG.F2.0F.WIG 58 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add the low double-precision floating-point value from xmm3/mem to xmm2 and store the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ADDSS--Add Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ADDSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 58 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Add the low single-precision floating-point value from xmm2/m32 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.LIG.F3.0F.WIG 58 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add the low single-precision floating-point value from xmm3/mem to xmm2 and store the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ADDSUBPD--Packed Double-FP Add/Subtract.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ADDSUBPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D0 /r</opc>
+			<cpuid>
+				<flag>SSE3</flag>
+			</cpuid>
+			<dscrp>Add/subtract double-precision floating-point values from xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDSUBPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D0 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add/subtract packed double-precision floating-point values from xmm3/mem to xmm2 and stores result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDSUBPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D0 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add / subtract packed double-precision floating-point values from ymm3/mem to ymm2 and stores result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ADDSUBPS--Packed Single-FP Add/Subtract.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ADDSUBPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">F2 0F D0 /r</opc>
+			<cpuid>
+				<flag>SSE3</flag>
+			</cpuid>
+			<dscrp>Add/subtract single-precision floating-point values from xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDSUBPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG D0 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add/subtract single-precision floating-point values from xmm3/mem to xmm2 and stores result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VADDSUBPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.F2.0F.WIG D0 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add / subtract single-precision floating-point values from ymm3/mem to ymm2 and stores result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ADOX--Unsigned Integer Addition of Two Operands with Overflow Flag.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ADOX</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">F3 0F 38 F6 /r</opc>
+			<cpuid>
+				<flag>ADX</flag>
+			</cpuid>
+			<dscrp>Unsigned addition of r32 with OF, r/m32 to r32, writes OF.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ADOX</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">F3 REX.w 0F 38 F6 /r</opc>
+			<cpuid>
+				<flag>ADX</flag>
+			</cpuid>
+			<dscrp>Unsigned addition of r64 with OF, r/m64 to r64, writes OF.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>AESDEC--Perform One Round of an AES Decryption Flow.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>AESDEC</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 DE /r</opc>
+			<cpuid>
+				<flag>AES</flag>
+			</cpuid>
+			<dscrp>Perform one round of an AES decryption flow, using the Equivalent Inverse Cipher, operating on a 128-bit data (state) from xmm1 with a 128-bit round key from xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VAESDEC</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG DE /r</opc>
+			<cpuid>
+				<flag>AES</flag>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Perform one round of an AES decryption flow, using the Equivalent Inverse Cipher, operating on a 128-bit data (state) from xmm2 with a 128-bit round key from xmm3/m128; store the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>AESDECLAST--Perform Last Round of an AES Decryption Flow.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>AESDECLAST</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 DF /r</opc>
+			<cpuid>
+				<flag>AES</flag>
+			</cpuid>
+			<dscrp>Perform the last round of an AES decryption flow, using the Equivalent Inverse Cipher, operating on a 128-bit data (state) from xmm1 with a 128-bit round key from xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VAESDECLAST</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG DF /r</opc>
+			<cpuid>
+				<flag>AES</flag>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Perform the last round of an AES decryption flow, using the Equivalent Inverse Cipher, operating on a 128-bit data (state) from xmm2 with a 128-bit round key from xmm3/m128; store the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>AESENC--Perform One Round of an AES Encryption Flow.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>AESENC</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 DC /r</opc>
+			<cpuid>
+				<flag>AES</flag>
+			</cpuid>
+			<dscrp>Perform one round of an AES encryption flow, operating on a 128-bit data (state) from xmm1 with a 128-bit round key from xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VAESENC</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG DC /r</opc>
+			<cpuid>
+				<flag>AES</flag>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Perform one round of an AES encryption flow, operating on a 128-bit data (state) from xmm2 with a 128-bit round key from the xmm3/m128; store the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>AESENCLAST--Perform Last Round of an AES Encryption Flow.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>AESENCLAST</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 DD /r</opc>
+			<cpuid>
+				<flag>AES</flag>
+			</cpuid>
+			<dscrp>Perform the last round of an AES encryption flow, operating on a 128-bit data (state) from xmm1 with a 128-bit round key from xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VAESENCLAST</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG DD /r</opc>
+			<cpuid>
+				<flag>AES</flag>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Perform the last round of an AES encryption flow, operating on a 128-bit data (state) from xmm2 with a 128 bit round key from xmm3/m128; store the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>AESIMC--Perform the AES InvMixColumn Transformation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>AESIMC</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 DB /r</opc>
+			<cpuid>
+				<flag>AES</flag>
+			</cpuid>
+			<dscrp>Perform the InvMixColumn transformation on a 128-bit round key from xmm2/m128 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VAESIMC</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG DB /r</opc>
+			<cpuid>
+				<flag>AES</flag>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Perform the InvMixColumn transformation on a 128-bit round key from xmm2/m128 and store the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>AESKEYGENASSIST--AES Round Key Generation Assist.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>AESKEYGENASSIST</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F 3A DF /r ib</opc>
+			<cpuid>
+				<flag>AES</flag>
+			</cpuid>
+			<dscrp>Assist in AES round key generation using an 8 bits Round Constant (RCON) specified in the immediate byte, operating on 128 bits of data specified in xmm2/m128 and stores the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VAESKEYGENASSIST</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">VEX.128.66.0F3A.WIG DF /r ib</opc>
+			<cpuid>
+				<flag>AES</flag>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Assist in AES round key generation using 8 bits Round Constant (RCON) specified in the immediate byte, operating on 128 bits of data specified in xmm2/m128 and stores the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>AND--Logical AND.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>AND</mnem>
+			<args>AL,imm8</args>
+			<opc openc="I">24 ib</opc>
+			<dscrp>AL AND imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>AND</mnem>
+			<args>AX,imm16</args>
+			<opc openc="I">25 iw</opc>
+			<dscrp>AX AND imm16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>AND</mnem>
+			<args>EAX,imm32</args>
+			<opc openc="I">25 id</opc>
+			<dscrp>EAX AND imm32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>AND</mnem>
+			<args>RAX,imm32</args>
+			<opc openc="I">REX.W + 25 id</opc>
+			<dscrp>RAX AND imm32 sign-extended to 64-bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>AND</mnem>
+			<args>r/m8,imm8*</args>
+			<opc openc="MI">80 /4 ib</opc>
+			<dscrp>r/m8 AND imm8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>AND</mnem>
+			<args>r/m8,imm8</args>
+			<opc openc="MI">REX + 80 /4 ib</opc>
+			<dscrp>r/m8 AND imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>AND</mnem>
+			<args>r/m16,imm16</args>
+			<opc openc="MI">81 /4 iw</opc>
+			<dscrp>r/m16 AND imm16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>AND</mnem>
+			<args>r/m32,imm32</args>
+			<opc openc="MI">81 /4 id</opc>
+			<dscrp>r/m32 AND imm32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>AND</mnem>
+			<args>r/m64,imm32</args>
+			<opc openc="MI">REX.W + 81 /4 id</opc>
+			<dscrp>r/m64 AND imm32 sign extended to 64-bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>AND</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">83 /4 ib</opc>
+			<dscrp>r/m16 AND imm8 (sign-extended).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>AND</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">83 /4 ib</opc>
+			<dscrp>r/m32 AND imm8 (sign-extended).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>AND</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + 83 /4 ib</opc>
+			<dscrp>r/m64 AND imm8 (sign-extended).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>AND</mnem>
+			<args>r/m8,r8**</args>
+			<opc openc="MR">20 /r</opc>
+			<dscrp>r/m8 AND r8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>AND</mnem>
+			<args>r/m8,r8</args>
+			<opc openc="MR">REX + 20 /r</opc>
+			<dscrp>r/m64 AND r8 (sign-extended).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>AND</mnem>
+			<args>r/m16,r16</args>
+			<opc openc="MR">21 /r</opc>
+			<dscrp>r/m16 AND r16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>AND</mnem>
+			<args>r/m32,r32</args>
+			<opc openc="MR">21 /r</opc>
+			<dscrp>r/m32 AND r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>AND</mnem>
+			<args>r/m64,r64</args>
+			<opc openc="MR">REX.W + 21 /r</opc>
+			<dscrp>r/m64 AND r32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>AND</mnem>
+			<args>r8,r/m8**</args>
+			<opc openc="RM">22 /r</opc>
+			<dscrp>r8 AND r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>AND</mnem>
+			<args>r8,r/m8</args>
+			<opc openc="RM">REX + 22 /r</opc>
+			<dscrp>r/m64 AND r8 (sign-extended).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>AND</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">23 /r</opc>
+			<dscrp>r16 AND r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>AND</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">23 /r</opc>
+			<dscrp>r32 AND r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>AND</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 23 /r</opc>
+			<dscrp>r64 AND r/m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="I">
+			<oprnd1>AL/AX/EAX/RAX</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ANDN--Logical AND NOT.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ANDN</mnem>
+			<args>r32a,r32b,r/m32</args>
+			<opc openc="RVM">VEX.NDS.LZ.0F38.W0 F2 /r</opc>
+			<cpuid>
+				<flag>BMI1</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of inverted r32b with r/m32, store result in r32a.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ANDN</mnem>
+			<args>r64a,r64b,r/m64</args>
+			<opc openc="RVM">VEX.NDS.LZ. 0F38.W1 F2 /r</opc>
+			<cpuid>
+				<flag>BMI1</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of inverted r64b with r/m64, store result in r64a.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ANDPD--Bitwise Logical AND of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ANDPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 54 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in xmm1 and xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 54 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 54 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ANDPS--Bitwise Logical AND of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ANDPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 54 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Bitwise logical AND of xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 54 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 54 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ANDNPD--Bitwise Logical AND NOT of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ANDNPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 55 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Bitwise logical AND NOT of xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 55 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed double-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 55/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed double-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ANDNPS--Bitwise Logical AND NOT of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ANDNPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 55 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Bitwise logical AND NOT of xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 55 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed single-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VANDNPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 55 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical AND NOT of packed single-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ARPL--Adjust RPL Field of Segment Selector.</brief>
+		<ins x32m="V" x64m="NE">
+			<mnem>ARPL</mnem>
+			<args>r/m16,r16</args>
+			<opc openc="NP">63 /r</opc>
+			<dscrp>Adjust RPL of r/m16 to not less than RPL of r16.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BLENDPD--Blend Packed Double Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BLENDPD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F 3A 0D /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Select packed DP-FP values from xmm1 and xmm2/m128 from mask specified in imm8 and store the values into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.WIG 0D /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Select packed double-precision floating-point Values from xmm2 and xmm3/m128 from mask in imm8 and store the values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F3A.WIG 0D /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Select packed double-precision floating-point Values from ymm2 and ymm3/m256 from mask in imm8 and store the values in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)[3:0]</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BEXTR--Bit Field Extract.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BEXTR</mnem>
+			<args>r32a,r/m32,r32b</args>
+			<opc openc="RMV">VEX.NDS.LZ.0F38.W0 F7 /r</opc>
+			<cpuid>
+				<flag>BMI1</flag>
+			</cpuid>
+			<dscrp>Contiguous bitwise extract from r/m32 using r32b as control; store result in r32a.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BEXTR</mnem>
+			<args>r64a,r/m64,r64b</args>
+			<opc openc="RMV">VEX.NDS.LZ.0F38.W1 F7 /r</opc>
+			<cpuid>
+				<flag>BMI1</flag>
+			</cpuid>
+			<dscrp>Contiguous bitwise extract from r/m64 using r64b as control; store result in r64a.</dscrp>
+		</ins>
+		<oprndenc openc="RMV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>VEX.vvvv(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BLENDPS--Blend Packed Single Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BLENDPS</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F 3A 0C /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Select packed single precision floating-point values from xmm1 and xmm2/m128 from mask specified in imm8 and store the values into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.WIG 0C /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Select packed single-precision floating-point values from xmm2 and xmm3/m128 from mask in imm8 and store the values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F3A.WIG 0C /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Select packed single-precision floating-point values from ymm2 and ymm3/m256 from mask in imm8 and store the values in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BLENDVPD--Variable Blend Packed Double Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BLENDVPD</mnem>
+			<args>xmm1,xmm2/m128,&lt;XMM0&gt;</args>
+			<opc openc="RM0">66 0F 38 15 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Select packed DP FP values from xmm1 and xmm2 from mask specified in XMM0 and store the values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDVPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128,xmm4</args>
+			<opc openc="RVMR">VEX.NDS.128.66.0F3A.W0 4B /r /is4</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Conditionally copy double-precision floatingpoint values from xmm2 or xmm3/m128 to xmm1, based on mask bits in the mask operand, xmm4.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDVPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256,ymm4</args>
+			<opc openc="RVMR">VEX.NDS.256.66.0F3A.W0 4B /r /is4</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Conditionally copy double-precision floatingpoint values from ymm2 or ymm3/m256 to ymm1, based on mask bits in the mask operand, ymm4.</dscrp>
+		</ins>
+		<oprndenc openc="RM0">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>implicit XMM0</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)[7:4]</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BLENDVPS--Variable Blend Packed Single Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BLENDVPS</mnem>
+			<args>xmm1,xmm2/m128,&lt;XMM0&gt;</args>
+			<opc openc="RM0">66 0F 38 14 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Select packed single precision floating-point values from xmm1 and xmm2/m128 from mask specified in XMM0 and store the values into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDVPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128,xmm4</args>
+			<opc openc="RVMR">VEX.NDS.128.66.0F3A.W0 4A /r /is4</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Conditionally copy single-precision floatingpoint values from xmm2 or xmm3/m128 to xmm1, based on mask bits in the specified mask operand, xmm4.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBLENDVPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256,ymm4</args>
+			<opc openc="RVMR">VEX.NDS.256.66.0F3A.W0 4A /r /is4</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Conditionally copy single-precision floatingpoint values from ymm2 or ymm3/m256 to ymm1, based on mask bits in the specified mask register, ymm4.</dscrp>
+		</ins>
+		<oprndenc openc="RM0">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>implicit XMM0</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)[7:4]</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BLSI--Extract Lowest Set Isolated Bit.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BLSI</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="VM">VEX.NDD.LZ.0F38.W0 F3 /3</opc>
+			<cpuid>
+				<flag>BMI1</flag>
+			</cpuid>
+			<dscrp>Extract lowest set bit from r/m32 and set that bit in r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BLSI</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="VM">VEX.NDD.LZ.0F38.W1 F3 /3</opc>
+			<cpuid>
+				<flag>BMI1</flag>
+			</cpuid>
+			<dscrp>Extract lowest set bit from r/m64, and set that bit in r64.</dscrp>
+		</ins>
+		<oprndenc openc="VM">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BLSMSK--Get Mask Up to Lowest Set Bit.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BLSMSK</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="VM">VEX.NDD.LZ.0F38.W0 F3 /2</opc>
+			<cpuid>
+				<flag>BMI1</flag>
+			</cpuid>
+			<dscrp>Set all lower bits in r32 to '1' starting from bit 0 to lowest set bit in r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BLSMSK</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="VM">VEX.NDD.LZ.0F38.W1 F3 /2</opc>
+			<cpuid>
+				<flag>BMI1</flag>
+			</cpuid>
+			<dscrp>Set all lower bits in r64 to '1' starting from bit 0 to lowest set bit in r/m64.</dscrp>
+		</ins>
+		<oprndenc openc="VM">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BLSR--Reset Lowest Set Bit.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BLSR</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="VM">VEX.NDD.LZ.0F38.W0 F3 /1</opc>
+			<cpuid>
+				<flag>BMI1</flag>
+			</cpuid>
+			<dscrp>Reset lowest set bit of r/m32, keep all other bits of r/m32 and write result to r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BLSR</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="VM">VEX.NDD.LZ.0F38.W1 F3 /1</opc>
+			<cpuid>
+				<flag>BMI1</flag>
+			</cpuid>
+			<dscrp>Reset lowest set bit of r/m64, keep all other bits of r/m64 and write result to r64.</dscrp>
+		</ins>
+		<oprndenc openc="VM">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BNDCL--Check Lower Bound.</brief>
+		<ins x32m="V" x64m="NE">
+			<mnem>BNDCL</mnem>
+			<args>bnd,r/m32</args>
+			<opc openc="RM">F3 0F 1A /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Generate a #BR if the address in r/m32 is lower than the lower bound in bnd.LB.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BNDCL</mnem>
+			<args>bnd,r/m64</args>
+			<opc openc="RM">F3 0F 1A /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Generate a #BR if the address in r/m64 is lower than the lower bound in bnd.LB.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BNDCU/BNDCN--Check Upper Bound.</brief>
+		<ins x32m="V" x64m="NE">
+			<mnem>BNDCU</mnem>
+			<args>bnd,r/m32</args>
+			<opc openc="RM">F2 0F 1A /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Generate a #BR if the address in r/m32 is higher than the upper bound in bnd.UB (bnb.UB in 1's complement form).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BNDCU</mnem>
+			<args>bnd,r/m64</args>
+			<opc openc="RM">F2 0F 1A /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Generate a #BR if the address in r/m64 is higher than the upper bound in bnd.UB (bnb.UB in 1's complement form).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>BNDCN</mnem>
+			<args>bnd,r/m32</args>
+			<opc openc="RM">F2 0F 1B /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Generate a #BR if the address in r/m32 is higher than the upper bound in bnd.UB (bnb.UB not in 1's complement form).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BNDCN</mnem>
+			<args>bnd,r/m64</args>
+			<opc openc="RM">F2 0F 1B /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Generate a #BR if the address in r/m64 is higher than the upper bound in bnd.UB (bnb.UB not in 1's complement form).</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BNDLDX--Load Extended Bounds Using Address Translation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BNDLDX</mnem>
+			<args>bnd,mib</args>
+			<opc openc="RM">0F 1A /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Load the bounds stored in a bound table entry (BTE) into bnd with address translation using the base of mib and conditional on the index of mib matching the pointer value in the BTE.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>SIB.base(r): Address of pointer,SIB.index(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BNDMK--Make Bounds.</brief>
+		<ins x32m="V" x64m="NE">
+			<mnem>BNDMK</mnem>
+			<args>bnd,m32</args>
+			<opc openc="RM">F3 0F 1B /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Make lower and upper bounds from m32 and store them in bnd.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BNDMK</mnem>
+			<args>bnd,m64</args>
+			<opc openc="RM">F3 0F 1B /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Make lower and upper bounds from m64 and store them in bnd.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BNDMOV--Move Bounds.</brief>
+		<ins x32m="V" x64m="NE">
+			<mnem>BNDMOV</mnem>
+			<args>bnd1,bnd2/m64</args>
+			<opc openc="RM">66 0F 1A /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Move lower and upper bound from bnd2/m64 to bound register bnd1.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BNDMOV</mnem>
+			<args>bnd1,bnd2/m128</args>
+			<opc openc="RM">66 0F 1A /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Move lower and upper bound from bnd2/m128 to bound register bnd1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>BNDMOV</mnem>
+			<args>bnd1/m64,bnd2</args>
+			<opc openc="MR">66 0F 1B /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Move lower and upper bound from bnd2 to bnd1/m64.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BNDMOV</mnem>
+			<args>bnd1/m128,bnd2</args>
+			<opc openc="MR">66 0F 1B /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Move lower and upper bound from bnd2 to bound register bnd1/m128.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BNDSTX--Store Extended Bounds Using Address Translation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BNDSTX</mnem>
+			<args>mib,bnd</args>
+			<opc openc="MR">0F 1B /r</opc>
+			<cpuid>
+				<flag>MPX</flag>
+			</cpuid>
+			<dscrp>Store the bounds in bnd and the pointer value in the index register of mib to a bound table entry (BTE) with address translation using the base of mib.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>SIB.base(r): Address of pointer,SIB.index(r)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BOUND--Check Array Index Against Bounds.</brief>
+		<ins x32m="V" x64m="I">
+			<mnem>BOUND</mnem>
+			<args>r16,m16&amp;16</args>
+			<opc openc="RM">62 /r</opc>
+			<dscrp>Check if r16 (array index) is within bounds specified by m16&amp;16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>BOUND</mnem>
+			<args>r32,m32&amp;32</args>
+			<opc openc="RM">62 /r</opc>
+			<dscrp>Check if r32 (array index) is within bounds specified by m32&amp;32.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BSF--Bit Scan Forward.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BSF</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F BC /r</opc>
+			<dscrp>Bit scan forward on r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>BSF</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F BC /r</opc>
+			<dscrp>Bit scan forward on r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BSF</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F BC /r</opc>
+			<dscrp>Bit scan forward on r/m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BSR--Bit Scan Reverse.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BSR</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F BD /r</opc>
+			<dscrp>Bit scan reverse on r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>BSR</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F BD /r</opc>
+			<dscrp>Bit scan reverse on r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BSR</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F BD /r</opc>
+			<dscrp>Bit scan reverse on r/m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BSWAP--Byte Swap.</brief>
+		<ins x32m="V" x64m="V*">
+			<mnem>BSWAP</mnem>
+			<args>r32</args>
+			<opc openc="O">0F C8+rd</opc>
+			<dscrp>Reverses the byte order of a 32-bit register.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BSWAP</mnem>
+			<args>r64</args>
+			<opc openc="O">REX.W + 0F C8+rd</opc>
+			<dscrp>Reverses the byte order of a 64-bit register.</dscrp>
+		</ins>
+		<oprndenc openc="O">
+			<oprnd1>opcode + rd(r,w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BT--Bit Test.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BT</mnem>
+			<args>r/m16,r16</args>
+			<opc openc="MR">0F A3 /r</opc>
+			<dscrp>Store selected bit in CF flag.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>BT</mnem>
+			<args>r/m32,r32</args>
+			<opc openc="MR">0F A3 /r</opc>
+			<dscrp>Store selected bit in CF flag.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BT</mnem>
+			<args>r/m64,r64</args>
+			<opc openc="MR">REX.W + 0F A3 /r</opc>
+			<dscrp>Store selected bit in CF flag.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>BT</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">0F BA /4 ib</opc>
+			<dscrp>Store selected bit in CF flag.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>BT</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">0F BA /4 ib</opc>
+			<dscrp>Store selected bit in CF flag.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BT</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + 0F BA /4 ib</opc>
+			<dscrp>Store selected bit in CF flag.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BTC--Bit Test and Complement.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BTC</mnem>
+			<args>r/m16,r16</args>
+			<opc openc="MR">0F BB /r</opc>
+			<dscrp>Store selected bit in CF flag and complement.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>BTC</mnem>
+			<args>r/m32,r32</args>
+			<opc openc="MR">0F BB /r</opc>
+			<dscrp>Store selected bit in CF flag and complement.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BTC</mnem>
+			<args>r/m64,r64</args>
+			<opc openc="MR">REX.W + 0F BB /r</opc>
+			<dscrp>Store selected bit in CF flag and complement.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>BTC</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">0F BA /7 ib</opc>
+			<dscrp>Store selected bit in CF flag and complement.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>BTC</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">0F BA /7 ib</opc>
+			<dscrp>Store selected bit in CF flag and complement.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BTC</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + 0F BA /7 ib</opc>
+			<dscrp>Store selected bit in CF flag and complement.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BTR--Bit Test and Reset.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BTR</mnem>
+			<args>r/m16,r16</args>
+			<opc openc="MR">0F B3 /r</opc>
+			<dscrp>Store selected bit in CF flag and clear.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>BTR</mnem>
+			<args>r/m32,r32</args>
+			<opc openc="MR">0F B3 /r</opc>
+			<dscrp>Store selected bit in CF flag and clear.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BTR</mnem>
+			<args>r/m64,r64</args>
+			<opc openc="MR">REX.W + 0F B3 /r</opc>
+			<dscrp>Store selected bit in CF flag and clear.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>BTR</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">0F BA /6 ib</opc>
+			<dscrp>Store selected bit in CF flag and clear.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>BTR</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">0F BA /6 ib</opc>
+			<dscrp>Store selected bit in CF flag and clear.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BTR</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + 0F BA /6 ib</opc>
+			<dscrp>Store selected bit in CF flag and clear.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BTS--Bit Test and Set.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BTS</mnem>
+			<args>r/m16,r16</args>
+			<opc openc="MR">0F AB /r</opc>
+			<dscrp>Store selected bit in CF flag and set.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>BTS</mnem>
+			<args>r/m32,r32</args>
+			<opc openc="MR">0F AB /r</opc>
+			<dscrp>Store selected bit in CF flag and set.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BTS</mnem>
+			<args>r/m64,r64</args>
+			<opc openc="MR">REX.W + 0F AB /r</opc>
+			<dscrp>Store selected bit in CF flag and set.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>BTS</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">0F BA /5 ib</opc>
+			<dscrp>Store selected bit in CF flag and set.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>BTS</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">0F BA /5 ib</opc>
+			<dscrp>Store selected bit in CF flag and set.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BTS</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + 0F BA /5 ib</opc>
+			<dscrp>Store selected bit in CF flag and set.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>BZHI--Zero High Bits Starting with Specified Bit Position.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>BZHI</mnem>
+			<args>r32a,r/m32,r32b</args>
+			<opc openc="RMV">VEX.NDS.LZ.0F38.W0 F5 /r</opc>
+			<cpuid>
+				<flag>BMI2</flag>
+			</cpuid>
+			<dscrp>Zero bits in r/m32 starting with the position in r32b, write result to r32a.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>BZHI</mnem>
+			<args>r64a,r/m64,r64b</args>
+			<opc openc="RMV">VEX.NDS.LZ.0F38.W1 F5 /r</opc>
+			<cpuid>
+				<flag>BMI2</flag>
+			</cpuid>
+			<dscrp>Zero bits in r/m64 starting with the position in r64b, write result to r64a.</dscrp>
+		</ins>
+		<oprndenc openc="RMV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>VEX.vvvv(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CALL--Call Procedure.</brief>
+		<ins x32m="V" x64m="NS">
+			<mnem>CALL</mnem>
+			<args>rel16</args>
+			<opc openc="M">E8 cw</opc>
+			<dscrp>Call near, relative, displacement relative to next instruction.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CALL</mnem>
+			<args>rel32</args>
+			<opc openc="M">E8 cd</opc>
+			<dscrp>Call near, relative, displacement relative to next instruction. 32-bit displacement sign extended to 64-bits in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>CALL</mnem>
+			<args>r/m16</args>
+			<opc openc="M">FF /2</opc>
+			<dscrp>Call near, absolute indirect, address given in r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>CALL</mnem>
+			<args>r/m32</args>
+			<opc openc="M">FF /2</opc>
+			<dscrp>Call near, absolute indirect, address given in r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CALL</mnem>
+			<args>r/m64</args>
+			<opc openc="M">FF /2</opc>
+			<dscrp>Call near, absolute indirect, address given in r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>CALL</mnem>
+			<args>ptr16:16</args>
+			<opc openc="D">9A cd</opc>
+			<dscrp>Call far, absolute, address given in operand.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>CALL</mnem>
+			<args>ptr16:32</args>
+			<opc openc="D">9A cp</opc>
+			<dscrp>Call far, absolute, address given in operand.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CALL</mnem>
+			<args>m16:16</args>
+			<opc openc="M">FF /3</opc>
+			<dscrp>Call far, absolute indirect address given in m16:16. In 32-bit mode: if selector points to a gate, then RIP = 32-bit zero extended displacement taken from gate; else RIP = zero extended 16instruction.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CALL</mnem>
+			<args>m16:32</args>
+			<opc openc="M">FF /3</opc>
+			<dscrp>In 64-bit mode: If selector points to a gate, then RIP = 64-bit displacement taken from gate; else RIP = zero extended 32-bit offset from far pointer referenced in the instruction.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CALL</mnem>
+			<args>m16:64</args>
+			<opc openc="M">REX.W + FF /3</opc>
+			<dscrp>In 64-bit mode: If selector points to a gate, then RIP = 64-bit displacement taken from gate; else RIP = 64-bit offset from far pointer referenced in the instruction.</dscrp>
+		</ins>
+		<oprndenc openc="D">
+			<oprnd1>Offset</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CBW/CWDE/CDQE--Convert Byte to Word/Convert Word to Doubleword/Convert Doubleword to Quadword.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CBW</mnem>
+			<args>void</args>
+			<opc openc="NP">98</opc>
+			<dscrp>AX &lt;-- sign-extend of AL.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CWDE</mnem>
+			<args>void</args>
+			<opc openc="NP">98</opc>
+			<dscrp>EAX &lt;-- sign-extend of AX.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CDQE</mnem>
+			<args>void</args>
+			<opc openc="NP">REX.W + 98</opc>
+			<dscrp>RAX &lt;-- sign-extend of EAX.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CLAC--Clear AC Flag in EFLAGS Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CLAC</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 01 CA</opc>
+			<dscrp>Clear the AC flag in the EFLAGS register.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CLC--Clear Carry Flag.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CLC</mnem>
+			<args>void</args>
+			<opc openc="NP">F8</opc>
+			<dscrp>Clear CF flag.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CLD--Clear Direction Flag.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CLD</mnem>
+			<args>void</args>
+			<opc openc="NP">FC</opc>
+			<dscrp>Clear DF flag.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CLFLUSH--Flush Cache Line.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CLFLUSH</mnem>
+			<args>m8</args>
+			<opc openc="M">0F AE /7</opc>
+			<dscrp>Flushes cache line containing m8.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CLI--Clear Interrupt Flag.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CLI</mnem>
+			<args>void</args>
+			<opc openc="NP">FA</opc>
+			<dscrp>Clear interrupt flag; interrupts disabled when interrupt flag cleared.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CLTS--Clear Task-Switched Flag in CR0.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CLTS</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 06</opc>
+			<dscrp>Clears TS flag in CR0.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CMC--Complement Carry Flag.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CMC</mnem>
+			<args>void</args>
+			<opc openc="NP">F5</opc>
+			<dscrp>Complement CF flag.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CMOVcc--Conditional Move.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVA</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 47 /r</opc>
+			<dscrp>Move if above (CF=0 and ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVA</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 47 /r</opc>
+			<dscrp>Move if above (CF=0 and ZF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVA</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 47 /r</opc>
+			<dscrp>Move if above (CF=0 and ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVAE</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 43 /r</opc>
+			<dscrp>Move if above or equal (CF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVAE</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 43 /r</opc>
+			<dscrp>Move if above or equal (CF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVAE</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 43 /r</opc>
+			<dscrp>Move if above or equal (CF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVB</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 42 /r</opc>
+			<dscrp>Move if below (CF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVB</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 42 /r</opc>
+			<dscrp>Move if below (CF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVB</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 42 /r</opc>
+			<dscrp>Move if below (CF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVBE</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 46 /r</opc>
+			<dscrp>Move if below or equal (CF=1 or ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVBE</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 46 /r</opc>
+			<dscrp>Move if below or equal (CF=1 or ZF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVBE</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 46 /r</opc>
+			<dscrp>Move if below or equal (CF=1 or ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVC</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 42 /r</opc>
+			<dscrp>Move if carry (CF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVC</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 42 /r</opc>
+			<dscrp>Move if carry (CF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVC</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 42 /r</opc>
+			<dscrp>Move if carry (CF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVE</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 44 /r</opc>
+			<dscrp>Move if equal (ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVE</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 44 /r</opc>
+			<dscrp>Move if equal (ZF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVE</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 44 /r</opc>
+			<dscrp>Move if equal (ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVG</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 4F /r</opc>
+			<dscrp>Move if greater (ZF=0 and SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVG</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 4F /r</opc>
+			<dscrp>Move if greater (ZF=0 and SF=OF).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVG</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 4F /r</opc>
+			<dscrp>Move if greater (ZF=0 and SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVGE</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 4D /r</opc>
+			<dscrp>Move if greater or equal (SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVGE</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 4D /r</opc>
+			<dscrp>Move if greater or equal (SF=OF).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVGE</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 4D /r</opc>
+			<dscrp>Move if greater or equal (SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVL</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 4C /r</opc>
+			<dscrp>Move if less (SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVL</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 4C /r</opc>
+			<dscrp>Move if less (SF != OF).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVL</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 4C /r</opc>
+			<dscrp>Move if less (SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVLE</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 4E /r</opc>
+			<dscrp>Move if less or equal (ZF=1 or SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVLE</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 4E /r</opc>
+			<dscrp>Move if less or equal (ZF=1 or SF != OF).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVLE</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 4E /r</opc>
+			<dscrp>Move if less or equal (ZF=1 or SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNA</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 46 /r</opc>
+			<dscrp>Move if not above (CF=1 or ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNA</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 46 /r</opc>
+			<dscrp>Move if not above (CF=1 or ZF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVNA</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 46 /r</opc>
+			<dscrp>Move if not above (CF=1 or ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNAE</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 42 /r</opc>
+			<dscrp>Move if not above or equal (CF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNAE</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 42 /r</opc>
+			<dscrp>Move if not above or equal (CF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVNAE</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 42 /r</opc>
+			<dscrp>Move if not above or equal (CF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNB</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 43 /r</opc>
+			<dscrp>Move if not below (CF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNB</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 43 /r</opc>
+			<dscrp>Move if not below (CF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVNB</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 43 /r</opc>
+			<dscrp>Move if not below (CF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNBE</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 47 /r</opc>
+			<dscrp>Move if not below or equal (CF=0 and ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNBE</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 47 /r</opc>
+			<dscrp>Move if not below or equal (CF=0 and ZF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVNBE</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 47 /r</opc>
+			<dscrp>Move if not below or equal (CF=0 and ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNC</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 43 /r</opc>
+			<dscrp>Move if not carry (CF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNC</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 43 /r</opc>
+			<dscrp>Move if not carry (CF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVNC</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 43 /r</opc>
+			<dscrp>Move if not carry (CF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNE</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 45 /r</opc>
+			<dscrp>Move if not equal (ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNE</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 45 /r</opc>
+			<dscrp>Move if not equal (ZF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVNE</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 45 /r</opc>
+			<dscrp>Move if not equal (ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNG</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 4E /r</opc>
+			<dscrp>Move if not greater (ZF=1 or SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNG</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 4E /r</opc>
+			<dscrp>Move if not greater (ZF=1 or SF != OF).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVNG</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 4E /r</opc>
+			<dscrp>Move if not greater (ZF=1 or SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNGE</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 4C /r</opc>
+			<dscrp>Move if not greater or equal (SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNGE</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 4C /r</opc>
+			<dscrp>Move if not greater or equal (SF != OF).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVNGE</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 4C /r</opc>
+			<dscrp>Move if not greater or equal (SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNL</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 4D /r</opc>
+			<dscrp>Move if not less (SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNL</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 4D /r</opc>
+			<dscrp>Move if not less (SF=OF).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVNL</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 4D /r</opc>
+			<dscrp>Move if not less (SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNLE</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 4F /r</opc>
+			<dscrp>Move if not less or equal (ZF=0 and SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNLE</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 4F /r</opc>
+			<dscrp>Move if not less or equal (ZF=0 and SF=OF).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVNLE</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 4F /r</opc>
+			<dscrp>Move if not less or equal (ZF=0 and SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNO</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 41 /r</opc>
+			<dscrp>Move if not overflow (OF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNO</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 41 /r</opc>
+			<dscrp>Move if not overflow (OF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVNO</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 41 /r</opc>
+			<dscrp>Move if not overflow (OF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNP</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 4B /r</opc>
+			<dscrp>Move if not parity (PF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNP</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 4B /r</opc>
+			<dscrp>Move if not parity (PF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVNP</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 4B /r</opc>
+			<dscrp>Move if not parity (PF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNS</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 49 /r</opc>
+			<dscrp>Move if not sign (SF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNS</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 49 /r</opc>
+			<dscrp>Move if not sign (SF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVNS</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 49 /r</opc>
+			<dscrp>Move if not sign (SF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNZ</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 45 /r</opc>
+			<dscrp>Move if not zero (ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVNZ</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 45 /r</opc>
+			<dscrp>Move if not zero (ZF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVNZ</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 45 /r</opc>
+			<dscrp>Move if not zero (ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVO</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 40 /r</opc>
+			<dscrp>Move if overflow (OF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVO</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 40 /r</opc>
+			<dscrp>Move if overflow (OF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVO</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 40 /r</opc>
+			<dscrp>Move if overflow (OF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVP</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 4A /r</opc>
+			<dscrp>Move if parity (PF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVP</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 4A /r</opc>
+			<dscrp>Move if parity (PF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVP</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 4A /r</opc>
+			<dscrp>Move if parity (PF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVPE</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 4A /r</opc>
+			<dscrp>Move if parity even (PF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVPE</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 4A /r</opc>
+			<dscrp>Move if parity even (PF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVPE</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 4A /r</opc>
+			<dscrp>Move if parity even (PF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVPO</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 4B /r</opc>
+			<dscrp>Move if parity odd (PF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVPO</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 4B /r</opc>
+			<dscrp>Move if parity odd (PF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVPO</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 4B /r</opc>
+			<dscrp>Move if parity odd (PF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVS</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 48 /r</opc>
+			<dscrp>Move if sign (SF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVS</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 48 /r</opc>
+			<dscrp>Move if sign (SF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVS</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 48 /r</opc>
+			<dscrp>Move if sign (SF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVZ</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F 44 /r</opc>
+			<dscrp>Move if zero (ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMOVZ</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F 44 /r</opc>
+			<dscrp>Move if zero (ZF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMOVZ</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F 44 /r</opc>
+			<dscrp>Move if zero (ZF=1).</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CMP--Compare Two Operands.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CMP</mnem>
+			<args>AL,imm8</args>
+			<opc openc="I">3C ib</opc>
+			<dscrp>Compare imm8 with AL.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMP</mnem>
+			<args>AX,imm16</args>
+			<opc openc="I">3D iw</opc>
+			<dscrp>Compare imm16 with AX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMP</mnem>
+			<args>EAX,imm32</args>
+			<opc openc="I">3D id</opc>
+			<dscrp>Compare imm32 with EAX.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMP</mnem>
+			<args>RAX,imm32</args>
+			<opc openc="I">REX.W + 3D id</opc>
+			<dscrp>Compare imm32 sign-extended to 64-bits with RAX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMP</mnem>
+			<args>r/m8,imm8*</args>
+			<opc openc="MI">80 /7 ib</opc>
+			<dscrp>Compare imm8 with r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMP</mnem>
+			<args>r/m8,imm8</args>
+			<opc openc="MI">REX + 80 /7 ib</opc>
+			<dscrp>Compare imm8 with r/m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMP</mnem>
+			<args>r/m16,imm16</args>
+			<opc openc="MI">81 /7 iw</opc>
+			<dscrp>Compare imm16 with r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMP</mnem>
+			<args>r/m32,imm32</args>
+			<opc openc="MI">81 /7 id</opc>
+			<dscrp>Compare imm32 with r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMP</mnem>
+			<args>r/m64,imm32</args>
+			<opc openc="MI">REX.W + 81 /7 id</opc>
+			<dscrp>Compare imm32 sign-extended to 64-bits with r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMP</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">83 /7 ib</opc>
+			<dscrp>Compare imm8 with r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMP</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">83 /7 ib</opc>
+			<dscrp>Compare imm8 with r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMP</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + 83 /7 ib</opc>
+			<dscrp>Compare imm8 with r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMP</mnem>
+			<args>r/m8,r8**</args>
+			<opc openc="MR">38 /r</opc>
+			<dscrp>Compare r8 with r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMP</mnem>
+			<args>r/m8,r8</args>
+			<opc openc="MR">REX + 38 /r</opc>
+			<dscrp>Compare r8 with r/m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMP</mnem>
+			<args>r/m16,r16</args>
+			<opc openc="MR">39 /r</opc>
+			<dscrp>Compare r16 with r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMP</mnem>
+			<args>r/m32,r32</args>
+			<opc openc="MR">39 /r</opc>
+			<dscrp>Compare r32 with r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMP</mnem>
+			<args>r/m64,r64</args>
+			<opc openc="MR">REX.W + 39 /r</opc>
+			<dscrp>Compare r64 with r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMP</mnem>
+			<args>r8,r/m8**</args>
+			<opc openc="RM">3A /r</opc>
+			<dscrp>Compare r/m8 with r8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMP</mnem>
+			<args>r8,r/m8</args>
+			<opc openc="RM">REX + 3A /r</opc>
+			<dscrp>Compare r/m8 with r8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMP</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">3B /r</opc>
+			<dscrp>Compare r/m16 with r16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMP</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">3B /r</opc>
+			<dscrp>Compare r/m32 with r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMP</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 3B /r</opc>
+			<dscrp>Compare r/m64 with r64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="I">
+			<oprnd1>AL/AX/EAX/RAX(r)</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CMPPD--Compare Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CMPPD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F C2 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed double-precision floatingpoint values in xmm2/m128 and xmm1 using imm8 as comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F.WIG C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed double-precision floatingpoint values in xmm3/m128 and xmm2 using bits 4:0 of imm8 as a comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F.WIG C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed double-precision floatingpoint values in ymm3/m256 and ymm2 using bits 4:0 of imm8 as a comparison predicate.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CMPPS--Compare Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CMPPS</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">0F C2 /r ib</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Compare packed single-precision floatingpoint values in xmm2/mem and xmm1 using imm8 as comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.0F.WIG C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed single-precision floatingpoint values in xmm3/m128 and xmm2 using bits 4:0 of imm8 as a comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.0F.WIG C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed single-precision floatingpoint values in ymm3/m256 and ymm2 using bits 4:0 of imm8 as a comparison predicate.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CMPS/CMPSB/CMPSW/CMPSD/CMPSQ--Compare String Operands.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CMPS</mnem>
+			<args>m8,m8</args>
+			<opc openc="NP">A6</opc>
+			<dscrp>For legacy mode, compare byte at address DS:(E)SI with byte at address ES:(E)DI; For 64byte at address (R|E)DI. The status flags are set accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMPS</mnem>
+			<args>m16,m16</args>
+			<opc openc="NP">A7</opc>
+			<dscrp>For legacy mode, compare word at address DS:(E)SI with word at address ES:(E)DI; For 64word at address (R|E)DI. The status flags are set accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMPS</mnem>
+			<args>m32,m32</args>
+			<opc openc="NP">A7</opc>
+			<dscrp>For legacy mode, compare dword at address DS:(E)SI at dword at address ES:(E)DI; For 64dword at address (R|E)DI. The status flags are set accordingly.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMPS</mnem>
+			<args>m64,m64</args>
+			<opc openc="NP">REX.W + A7</opc>
+			<dscrp>Compares quadword at address (R|E)SI with quadword at address (R|E)DI and sets the status flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMPSB</mnem>
+			<args>void</args>
+			<opc openc="NP">A6</opc>
+			<dscrp>For legacy mode, compare byte at address DS:(E)SI with byte at address ES:(E)DI; For 64byte at address (R|E)DI. The status flags are set accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMPSW</mnem>
+			<args>void</args>
+			<opc openc="NP">A7</opc>
+			<dscrp>For legacy mode, compare word at address DS:(E)SI with word at address ES:(E)DI; For 64word at address (R|E)DI. The status flags are set accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CMPSD</mnem>
+			<args>void</args>
+			<opc openc="NP">A7</opc>
+			<dscrp>For legacy mode, compare dword at address DS:(E)SI with dword at address ES:(E)DI; For 64-bit mode compare dword at address (R|E)SI with dword at address (R|E)DI. The status flags are set accordingly.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMPSQ</mnem>
+			<args>void</args>
+			<opc openc="NP">REX.W + A7</opc>
+			<dscrp>Compares quadword at address (R|E)SI with quadword at address (R|E)DI and sets the status flags accordingly.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CMPSD--Compare Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CMPSD</mnem>
+			<args>xmm1,xmm2/m64,imm8</args>
+			<opc openc="RMI">F2 0F C2 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point value in xmm2/m64 and xmm1 using imm8 as comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64,imm8</args>
+			<opc openc="RVMI">VEX.NDS.LIG.F2.0F.WIG C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare low double precision floating-point value in xmm3/m64 and xmm2 using bits 4:0 of imm8 as comparison predicate.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CMPSS--Compare Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CMPSS</mnem>
+			<args>xmm1,xmm2/m32,imm8</args>
+			<opc openc="RMI">F3 0F C2 /r ib</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point value in xmm2/m32 and xmm1 using imm8 as comparison predicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCMPSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32,imm8</args>
+			<opc openc="RVMI">VEX.NDS.LIG.F3.0F.WIG C2 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare low single precision floating-point value in xmm3/m32 and xmm2 using bits 4:0 of imm8 as comparison predicate.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CMPXCHG--Compare and Exchange.</brief>
+		<ins x32m="V*" x64m="V">
+			<mnem>CMPXCHG</mnem>
+			<args>r/m8,r8</args>
+			<opc openc="MR">0F B0/r</opc>
+			<dscrp>Compare AL with r/m8. If equal, ZF is set and r8 is loaded into r/m8. Else, clear ZF and load r/m8 into AL.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMPXCHG</mnem>
+			<args>r/m8**,r8</args>
+			<opc openc="MR">REX + 0F B0/r</opc>
+			<dscrp>Compare AL with r/m8. If equal, ZF is set and r8 is loaded into r/m8. Else, clear ZF and load r/m8 into AL.</dscrp>
+		</ins>
+		<ins x32m="V*" x64m="V">
+			<mnem>CMPXCHG</mnem>
+			<args>r/m16,r16</args>
+			<opc openc="MR">0F B1/r</opc>
+			<dscrp>Compare AX with r/m16. If equal, ZF is set and r16 is loaded into r/m16. Else, clear ZF and load r/m16 into AX.</dscrp>
+		</ins>
+		<ins x32m="V*" x64m="V">
+			<mnem>CMPXCHG</mnem>
+			<args>r/m32,r32</args>
+			<opc openc="MR">0F B1/r</opc>
+			<dscrp>Compare EAX with r/m32. If equal, ZF is set and r32 is loaded into r/m32. Else, clear ZF and load r/m32 into EAX.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMPXCHG</mnem>
+			<args>r/m64,r64</args>
+			<opc openc="MR">REX.W + 0F B1/r</opc>
+			<dscrp>Compare RAX with r/m64. If equal, ZF is set and r64 is loaded into r/m64. Else, clear ZF and load r/m64 into RAX.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CMPXCHG8B/CMPXCHG16B--Compare and Exchange Bytes.</brief>
+		<ins x32m="V*" x64m="V">
+			<mnem>CMPXCHG8B</mnem>
+			<args>m64</args>
+			<opc openc="M">0F C7 / 1 m64</opc>
+			<dscrp>Compare EDX:EAX with m64. If equal, set ZF and load ECX:EBX into m64. Else, clear ZF and load m64 into EDX:EAX.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CMPXCHG16B</mnem>
+			<args>m128</args>
+			<opc openc="M">REX.W + 0F C7 / 1 m128</opc>
+			<dscrp>Compare RDX:RAX with m128. If equal, set ZF and load RCX:RBX into m128. Else, clear ZF and load m128 into RDX:RAX.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>COMISD--Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>COMISD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0F 2F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/mem64 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMISD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.LIG.66.0F.WIG 2F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare low double precision floating-point values in xmm1 and xmm2/mem64 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>COMISS--Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>COMISS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">0F 2F /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCOMISS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.LIG.0F.WIG 2F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare low single precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CPUID--CPU Identification.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CPUID</mnem>
+			<args>void</args>
+			<opc openc="NP">0F A2</opc>
+			<dscrp>Returns processor identification and feature information to the EAX, EBX, ECX, and EDX registers, as determined by input entered in EAX (in some cases, ECX as well).</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CRC32--Accumulate CRC32 Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CRC32</mnem>
+			<args>r32,r/m8</args>
+			<opc openc="RM">F2 0F 38 F0 /r</opc>
+			<dscrp>Accumulate CRC32 on r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CRC32</mnem>
+			<args>r32,r/m8*</args>
+			<opc openc="RM">F2 REX 0F 38 F0 /r</opc>
+			<dscrp>Accumulate CRC32 on r/m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CRC32</mnem>
+			<args>r32,r/m16</args>
+			<opc openc="RM">F2 0F 38 F1 /r</opc>
+			<dscrp>Accumulate CRC32 on r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CRC32</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">F2 0F 38 F1 /r</opc>
+			<dscrp>Accumulate CRC32 on r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CRC32</mnem>
+			<args>r64,r/m8</args>
+			<opc openc="RM">F2 REX.W 0F 38 F0 /r</opc>
+			<dscrp>Accumulate CRC32 on r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CRC32</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">F2 REX.W 0F 38 F1 /r</opc>
+			<dscrp>Accumulate CRC32 on r/m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTDQ2PD--Convert Packed Dword Integers to Packed Double-Precision FP Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTDQ2PD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F3 0F E6</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert two packed signed doubleword integers from xmm2/m128 to two packed double-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG E6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert two packed signed doubleword integers from xmm2/mem to two packed double-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PD</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.F3.0F.WIG E6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed signed doubleword integers from xmm2/mem to four packed double-precision floating-point values in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTDQ2PS--Convert Packed Dword Integers to Packed Single-Precision FP Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTDQ2PS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 5B /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert four packed signed doubleword integers from xmm2/m128 to four packed single-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.0F.WIG 5B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed signed doubleword integers from xmm2/mem to four packed single-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTDQ2PS</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.0F.WIG 5B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert eight packed signed doubleword integers from ymm2/mem to eight packed single-precision floating-point values in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTPD2DQ--Convert Packed Double-Precision FP Values to Packed Dword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTPD2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">F2 0F E6 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floatingpoint values from xmm2/m128 to two packed signed doubleword integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.F2.0F.WIG E6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floatingpoint values in xmm2/mem to two signed doubleword integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2DQ</mnem>
+			<args>xmm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.F2.0F.WIG E6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floatingpoint values in ymm2/mem to four signed doubleword integers in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTPD2PI--Convert Packed Double-Precision FP Values to Packed Dword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTPD2PI</mnem>
+			<args>mm,xmm/m128</args>
+			<opc openc="RM">66 0F 2D /r</opc>
+			<dscrp>Convert two packed double-precision floatingpoint values from xmm/m128 to two packed signed doubleword integers in mm.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTPD2PS--Convert Packed Double-Precision FP Values to Packed Single-Precision FP Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTPD2PS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 5A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floatingpoint values in xmm2/m128 to two packed single-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2PS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 5A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floatingpoint values in xmm2/mem to two singleprecision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPD2PS</mnem>
+			<args>xmm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 5A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floatingpoint values in ymm2/mem to four singleprecision floating-point values in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTPI2PD--Convert Packed Dword Integers to Packed Double-Precision FP Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTPI2PD</mnem>
+			<args>xmm,mm/m64*</args>
+			<opc openc="RM">66 0F 2A /r</opc>
+			<dscrp>Convert two packed signed doubleword integers from mm/mem64 to two packed double-precision floating-point values in xmm.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTPI2PS--Convert Packed Dword Integers to Packed Single-Precision FP Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTPI2PS</mnem>
+			<args>xmm,mm/m64</args>
+			<opc openc="RM">0F 2A /r</opc>
+			<dscrp>Convert two signed doubleword integers from mm/m64 to two single-precision floating-point values in xmm.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTPS2DQ--Convert Packed Single-Precision FP Values to Packed Dword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTPS2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 5B /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floatingpoint values from xmm2/m128 to four packed signed doubleword integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 5B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed single precision floatingpoint values from xmm2/mem to four packed signed doubleword values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2DQ</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 5B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single precision floatingpoint values from ymm2/mem to eight packed signed doubleword values in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTPS2PD--Convert Packed Single-Precision FP Values to Packed Double-Precision FP Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTPS2PD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">0F 5A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert two packed single-precision floatingpoint values in xmm2/m64 to two packed double-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.0F.WIG 5A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert two packed single-precision floatingpoint values in xmm2/mem to two packed double-precision floating-point values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PD</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.0F.WIG 5A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floatingpoint values in xmm2/mem to four packed double-precision floating-point values in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTPS2PI--Convert Packed Single-Precision FP Values to Packed Dword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTPS2PI</mnem>
+			<args>mm,xmm/m64</args>
+			<opc openc="RM">0F 2D /r</opc>
+			<dscrp>Convert two packed single-precision floatingpoint values from xmm/m64 to two packed signed doubleword integers in mm.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTSD2SI--Convert Scalar Double-Precision FP Value to Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTSD2SI</mnem>
+			<args>r32,xmm/m64</args>
+			<opc openc="RM">F2 0F 2D /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm/m64 to one signed doubleword integer r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CVTSD2SI</mnem>
+			<args>r64,xmm/m64</args>
+			<opc openc="RM">F2 REX.W 0F 2D /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm/m64 to one signed quadword integer sign-extended into r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSD2SI</mnem>
+			<args>r32,xmm1/m64</args>
+			<opc openc="RM">VEX.LIG.F2.0F.W0 2D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one double precision floating-point value from xmm1/m64 to one signed doubleword integer r32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSD2SI</mnem>
+			<args>r64,xmm1/m64</args>
+			<opc openc="RM">VEX.LIG.F2.0F.W1 2D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one double precision floating-point value from xmm1/m64 to one signed quadword integer sign-extended into r64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTSD2SS--Convert Scalar Double-Precision FP Value to Scalar Single-Precision FP Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTSD2SS</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 5A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value in xmm2/m64 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSD2SS</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.LIG.F2.0F.WIG 5A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value in xmm3/m64 to one single-precision floating-point value and merge with high bits in xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTSI2SD--Convert Dword Integer to Scalar Double-Precision FP Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTSI2SD</mnem>
+			<args>xmm,r/m32</args>
+			<opc openc="RM">F2 0F 2A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one signed doubleword integer from r/m32 to one double-precision floating-point value in xmm.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CVTSI2SD</mnem>
+			<args>xmm,r/m64</args>
+			<opc openc="RM">F2 REX.W 0F 2A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one signed quadword integer from r/m64 to one double-precision floating-point value in xmm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSI2SD</mnem>
+			<args>xmm1,xmm2,r/m32</args>
+			<opc openc="RVM">VEX.NDS.LIG.F2.0F.W0 2A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one signed doubleword integer from r/m32 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSI2SD</mnem>
+			<args>xmm1,xmm2,r/m64</args>
+			<opc openc="RVM">VEX.NDS.LIG.F2.0F.W1 2A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one signed quadword integer from r/m64 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTSI2SS--Convert Dword Integer to Scalar Single-Precision FP Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTSI2SS</mnem>
+			<args>xmm,r/m32</args>
+			<opc openc="RM">F3 0F 2A /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Convert one signed doubleword integer from r/m32 to one single-precision floating-point value in xmm.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CVTSI2SS</mnem>
+			<args>xmm,r/m64</args>
+			<opc openc="RM">F3 REX.W 0F 2A /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Convert one signed quadword integer from r/m64 to one single-precision floating-point value in xmm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSI2SS</mnem>
+			<args>xmm1,xmm2,r/m32</args>
+			<opc openc="RVM">VEX.NDS.LIG.F3.0F.W0 2A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one signed doubleword integer from r/m32 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSI2SS</mnem>
+			<args>xmm1,xmm2,r/m64</args>
+			<opc openc="RVM">VEX.NDS.LIG.F3.0F.W1 2A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one signed quadword integer from r/m64 to one single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTSS2SD--Convert Scalar Single-Precision FP Value to Scalar Double-Precision FP Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTSS2SD</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 5A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value in xmm2/m32 to one double-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSS2SD</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.LIG.F3.0F.WIG 5A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value in xmm3/m32 to one double-precision floating-point value and merge with high bits of xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTSS2SI--Convert Scalar Single-Precision FP Value to Dword Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTSS2SI</mnem>
+			<args>r32,xmm/m32</args>
+			<opc openc="RM">F3 0F 2D /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm/m32 to one signed doubleword integer in r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CVTSS2SI</mnem>
+			<args>r64,xmm/m32</args>
+			<opc openc="RM">F3 REX.W 0F 2D /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm/m32 to one signed quadword integer in r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTSS2SI</mnem>
+			<args>r32,xmm1/m32</args>
+			<opc openc="RM">VEX.LIG.F3.0F.W0 2D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTSS2SI</mnem>
+			<args>r64,xmm1/m32</args>
+			<opc openc="RM">VEX.LIG.F3.0F.W1 2D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTTPD2DQ--Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTTPD2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E6 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floatingpoint values from xmm2/m128 to two packed signed doubleword integers in xmm1 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG E6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert two packed double-precision floatingpoint values in xmm2/mem to two signed doubleword integers in xmm1 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPD2DQ</mnem>
+			<args>xmm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG E6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed double-precision floatingpoint values in ymm2/mem to four signed doubleword integers in xmm1 using truncation.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTTPD2PI--Convert with Truncation Packed Double-Precision FP Values to Packed Dword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTTPD2PI</mnem>
+			<args>mm,xmm/m128</args>
+			<opc openc="RM">66 0F 2C /r</opc>
+			<dscrp>Convert two packer double-precision floatingpoint values from xmm/m128 to two packed signed doubleword integers in mm using truncation.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTTPS2DQ--Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTTPS2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">F3 0F 5B /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert four single-precision floating-point values from xmm2/m128 to four signed doubleword integers in xmm1 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2DQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG 5B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert four packed single precision floatingpoint values from xmm2/mem to four packed signed doubleword values in xmm1 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTPS2DQ</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.F3.0F.WIG 5B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single precision floatingpoint values from ymm2/mem to eight packed signed doubleword values in ymm1 using truncation.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTTPS2PI--Convert with Truncation Packed Single-Precision FP Values to Packed Dword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTTPS2PI</mnem>
+			<args>mm,xmm/m64</args>
+			<opc openc="RM">0F 2C /r</opc>
+			<dscrp>Convert two single-precision floating-point values from xmm/m64 to two signed doubleword signed integers in mm using truncation.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTTSD2SI--Convert with Truncation Scalar Double-Precision FP Value to Signed Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTTSD2SI</mnem>
+			<args>r32,xmm/m64</args>
+			<opc openc="RM">F2 0F 2C /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm/m64 to one signed doubleword integer in r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CVTTSD2SI</mnem>
+			<args>r64,xmm/m64</args>
+			<opc openc="RM">F2 REX.W 0F 2C /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Convert one double precision floating-point value from xmm/m64 to one signedquadword integer in r64 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTSD2SI</mnem>
+			<args>r32,xmm1/m64</args>
+			<opc openc="RM">VEX.LIG.F2.0F.W0 2C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer in r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTTSD2SI</mnem>
+			<args>r64,xmm1/m64</args>
+			<opc openc="RM">VEX.LIG.F2.0F.W1 2C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one double precision floating-point value from xmm1/m64 to one signed quadword integer in r64 using truncation.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CVTTSS2SI--Convert with Truncation Scalar Single-Precision FP Value to Dword Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CVTTSS2SI</mnem>
+			<args>r32,xmm/m32</args>
+			<opc openc="RM">F3 0F 2C /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm/m32 to one signed doubleword integer in r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CVTTSS2SI</mnem>
+			<args>r64,xmm/m32</args>
+			<opc openc="RM">F3 REX.W 0F 2C /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm/m32 to one signed quadword integer in r64 using truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTTSS2SI</mnem>
+			<args>r32,xmm1/m32</args>
+			<opc openc="RM">VEX.LIG.F3.0F.W0 2C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32 using truncation.</dscrp>
+		</ins>
+		<ins x32m="NE1" x64m="V">
+			<mnem>VCVTTSS2SI</mnem>
+			<args>r64,xmm1/m32</args>
+			<opc openc="RM">VEX.LIG.F3.0F.W1 2C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64 using truncation.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>CWD/CDQ/CQO--Convert Word to Doubleword/Convert Doubleword to Quadword.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>CWD</mnem>
+			<args>void</args>
+			<opc openc="NP">99</opc>
+			<dscrp>DX:AX &lt;-- sign-extend of AX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>CDQ</mnem>
+			<args>void</args>
+			<opc openc="NP">99</opc>
+			<dscrp>EDX:EAX &lt;-- sign-extend of EAX.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>CQO</mnem>
+			<args>void</args>
+			<opc openc="NP">REX.W + 99</opc>
+			<dscrp>RDX:RAX&lt;-- sign-extend of RAX.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>DAA--Decimal Adjust AL after Addition.</brief>
+		<ins x32m="V" x64m="I">
+			<mnem>DAA</mnem>
+			<args>void</args>
+			<opc openc="NP">27</opc>
+			<dscrp>Decimal adjust AL after addition.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>DAS--Decimal Adjust AL after Subtraction.</brief>
+		<ins x32m="V" x64m="I">
+			<mnem>DAS</mnem>
+			<args>void</args>
+			<opc openc="NP">2F</opc>
+			<dscrp>Decimal adjust AL after subtraction.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>DEC--Decrement by 1.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>DEC</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">FE /1</opc>
+			<dscrp>Decrement r/m8 by 1.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>DEC</mnem>
+			<args>r/m8</args>
+			<opc openc="M">REX + FE /1</opc>
+			<dscrp>Decrement r/m8 by 1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>DEC</mnem>
+			<args>r/m16</args>
+			<opc openc="M">FF /1</opc>
+			<dscrp>Decrement r/m16 by 1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>DEC</mnem>
+			<args>r/m32</args>
+			<opc openc="M">FF /1</opc>
+			<dscrp>Decrement r/m32 by 1.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>DEC</mnem>
+			<args>r/m64</args>
+			<opc openc="M">REX.W + FF /1</opc>
+			<dscrp>Decrement r/m64 by 1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>DEC</mnem>
+			<args>r16</args>
+			<opc openc="O">48+rw</opc>
+			<dscrp>Decrement r16 by 1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>DEC</mnem>
+			<args>r32</args>
+			<opc openc="O">48+rd</opc>
+			<dscrp>Decrement r32 by 1.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="O">
+			<oprnd1>opcode + rd(r,w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>DIV--Unsigned Divide.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>DIV</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">F6 /6</opc>
+			<dscrp>Unsigned divide AX by r/m8, with result stored in AL &lt;-- Quotient, AH ? Remainder.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>DIV</mnem>
+			<args>r/m8</args>
+			<opc openc="M">REX + F6 /6</opc>
+			<dscrp>Unsigned divide AX by r/m8, with result stored in AL &lt;-- Quotient, AH ? Remainder.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>DIV</mnem>
+			<args>r/m16</args>
+			<opc openc="M">F7 /6</opc>
+			<dscrp>Unsigned divide DX:AX by r/m16, with result stored in AX &lt;-- Quotient, DX ? Remainder.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>DIV</mnem>
+			<args>r/m32</args>
+			<opc openc="M">F7 /6</opc>
+			<dscrp>Unsigned divide EDX:EAX by r/m32, with result stored in EAX &lt;-- Quotient, EDX ? Remainder.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>DIV</mnem>
+			<args>r/m64</args>
+			<opc openc="M">REX.W + F7 /6</opc>
+			<dscrp>Unsigned divide RDX:RAX by r/m64, with result stored in RAX &lt;-- Quotient, RDX ? Remainder.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>DIVPD--Divide Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>DIVPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 5E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Divide packed double-precision floating-point values in xmm1 by packed double-precision floating-point values xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 5E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Divide packed double-precision floating-point values in xmm2 by packed double-precision floating-point values in xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 5E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Divide packed double-precision floating-point values in ymm2 by packed double-precision floating-point values in ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>DIVPS--Divide Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>DIVPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 5E /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Divide packed single-precision floating-point values in xmm1 by packed single-precision floating-point values xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 5E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Divide packed single-precision floating-point values in xmm2 by packed double-precision floating-point values in xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 5E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Divide packed single-precision floating-point values in ymm2 by packed double-precision floating-point values in ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>DIVSD--Divide Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>DIVSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 5E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Divide low double-precision floating-point value in xmm1 by low double-precision floating-point value in xmm2/mem64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.LIG.F2.0F.WIG 5E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Divide low double-precision floating point values in xmm2 by low double precision floating-point value in xmm3/mem64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>DIVSS--Divide Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>DIVSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 5E /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Divide low single-precision floating-point value in xmm1 by low single-precision floating-point value in xmm2/m32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDIVSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.LIG.F3.0F.WIG 5E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Divide low single-precision floating point value in xmm2 by low single precision floating-point value in xmm3/m32.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>DPPD--Dot Product of Packed Double Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>DPPD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F 3A 41 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Selectively multiply packed DP floating-point values from xmm1 with packed DP floatingpoint values from xmm2, add and selectively store the packed DP floating-point values to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDPPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.WIG 41 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Selectively multiply packed DP floating-point values from xmm2 with packed DP floatingpoint values from xmm3, add and selectively store the packed DP floating-point values to xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>DPPS--Dot Product of Packed Single Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>DPPS</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F 3A 40 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Selectively multiply packed SP floating-point values from xmm1 with packed SP floatingpoint values from xmm2, add and selectively store the packed SP floating-point values or zero values to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDPPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.WIG 40 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed SP floating point values from xmm1 with packed SP floating point values from xmm2/mem selectively add and store to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VDPPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F3A.WIG 40 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm2 with packed SP floating point values from ymm3/mem, selectively add pairs of elements and store to ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>EMMS--Empty MMX Technology State.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>EMMS</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 77</opc>
+			<dscrp>Set the x87 FPU tag word to empty.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ENTER--Make Stack Frame for Procedure Parameters.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ENTER</mnem>
+			<args>imm16,0</args>
+			<opc openc="II">C8 iw 00</opc>
+			<dscrp>Create a stack frame for a procedure.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ENTER</mnem>
+			<args>imm16,1</args>
+			<opc openc="II">C8 iw 01</opc>
+			<dscrp>Create a stack frame with a nested pointer for a procedure.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ENTER</mnem>
+			<args>imm16,imm8</args>
+			<opc openc="II">C8 iw ib</opc>
+			<dscrp>Create a stack frame with nested pointers for a procedure.</dscrp>
+		</ins>
+		<oprndenc openc="II">
+			<oprnd1>iw</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>EXTRACTPS--Extract Packed Single Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>EXTRACTPS</mnem>
+			<args>reg/m32,xmm2,imm8</args>
+			<opc openc="MRI">66 0F 3A 17 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Extract a single-precision floating-point value from xmm2 at the source offset specified by imm8 and store the result to reg or m32. The upper 32 bits of r64 is zeroed if reg is r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTPS</mnem>
+			<args>r/m32,xmm1,imm8</args>
+			<opc openc="MRI">VEX.128.66.0F3A.WIG 17 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract one single-precision floating-point value from xmm1 at the offset specified by imm8 and store the result in reg or m32. Zero extend the results in 64-bit register if applicable.</dscrp>
+		</ins>
+		<oprndenc openc="MRI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>F2XM1--Compute 2  -1.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>F2XM1</mnem>
+			<args>void</args>
+			<opc openc="">D9 F0</opc>
+			<dscrp>Replace ST(0) with (2ST(0) 1).</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FABS--Absolute Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FABS</mnem>
+			<args>void</args>
+			<opc openc="">D9 E1</opc>
+			<dscrp>Replace ST with its absolute value.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FADD/FADDP/FIADD--Add.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FADD</mnem>
+			<args>m32fp</args>
+			<opc openc="">D8 /0</opc>
+			<dscrp>Add m32fp to ST(0) and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FADD</mnem>
+			<args>m64fp</args>
+			<opc openc="">DC /0</opc>
+			<dscrp>Add m64fp to ST(0) and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FADD</mnem>
+			<args>ST(0),ST(i)</args>
+			<opc openc="">D8 C0+i</opc>
+			<dscrp>Add ST(0) to ST(i) and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FADD</mnem>
+			<args>ST(i),ST(0)</args>
+			<opc openc="">DC C0+i</opc>
+			<dscrp>Add ST(i) to ST(0) and store result in ST(i).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FADDP</mnem>
+			<args>ST(i),ST(0)</args>
+			<opc openc="">DE C0+i</opc>
+			<dscrp>Add ST(0) to ST(i), store result in ST(i), and pop the register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FADDP</mnem>
+			<args>void</args>
+			<opc openc="">DE C1</opc>
+			<dscrp>Add ST(0) to ST(1), store result in ST(1), and pop the register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FIADD</mnem>
+			<args>m32int</args>
+			<opc openc="">DA /0</opc>
+			<dscrp>Add m32int to ST(0) and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FIADD</mnem>
+			<args>m16int</args>
+			<opc openc="">DE /0</opc>
+			<dscrp>Add m16int to ST(0) and store result in ST(0).</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FBLD--Load Binary Coded Decimal.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FBLD</mnem>
+			<args>m80dec</args>
+			<opc openc="">DF /4</opc>
+			<dscrp>Convert BCD value to floating-point and push onto the FPU stack.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FBSTP--Store BCD Integer and Pop.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FBSTP</mnem>
+			<args>m80bcd</args>
+			<opc openc="">DF /6</opc>
+			<dscrp>Store ST(0) in m80bcd and pop ST(0).</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FCHS--Change Sign.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FCHS</mnem>
+			<args>void</args>
+			<opc openc="">D9 E0</opc>
+			<dscrp>Complements sign of ST(0).</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FCLEX/FNCLEX--Clear Exceptions.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FCLEX*</mnem>
+			<args>void</args>
+			<opc openc="">9B DB E2</opc>
+			<dscrp>Clear floating-point exception flags after checking for pending unmasked floating-point exceptions.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FNCLEX</mnem>
+			<args>void</args>
+			<opc openc="">DB E2</opc>
+			<dscrp>Clear floating-point exception flags without checking for pending unmasked floating-point exceptions.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FCMOVcc--Floating-Point Conditional Move.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FCMOVB</mnem>
+			<args>ST(0),ST(i)</args>
+			<opc openc="">DA C0+i</opc>
+			<dscrp>Move if below (CF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FCMOVE</mnem>
+			<args>ST(0),ST(i)</args>
+			<opc openc="">DA C8+i</opc>
+			<dscrp>Move if equal (ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FCMOVBE</mnem>
+			<args>ST(0),ST(i)</args>
+			<opc openc="">DA D0+i</opc>
+			<dscrp>Move if below or equal (CF=1 or ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FCMOVU</mnem>
+			<args>ST(0),ST(i)</args>
+			<opc openc="">DA D8+i</opc>
+			<dscrp>Move if unordered (PF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FCMOVNB</mnem>
+			<args>ST(0),ST(i)</args>
+			<opc openc="">DB C0+i</opc>
+			<dscrp>Move if not below (CF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FCMOVNE</mnem>
+			<args>ST(0),ST(i)</args>
+			<opc openc="">DB C8+i</opc>
+			<dscrp>Move if not equal (ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FCMOVNBE</mnem>
+			<args>ST(0),ST(i)</args>
+			<opc openc="">DB D0+i</opc>
+			<dscrp>Move if not below or equal (CF=0 and ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FCMOVNU</mnem>
+			<args>ST(0),ST(i)</args>
+			<opc openc="">DB D8+i</opc>
+			<dscrp>Move if not unordered (PF=0).</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FCOM/FCOMP/FCOMPP--Compare Floating Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FCOM</mnem>
+			<args>m32fp</args>
+			<opc openc="">D8 /2</opc>
+			<dscrp>Compare ST(0) with m32fp.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FCOM</mnem>
+			<args>m64fp</args>
+			<opc openc="">DC /2</opc>
+			<dscrp>Compare ST(0) with m64fp.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FCOM</mnem>
+			<args>ST(i)</args>
+			<opc openc="">D8 D0+i</opc>
+			<dscrp>Compare ST(0) with ST(i).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FCOM</mnem>
+			<args>void</args>
+			<opc openc="">D8 D1</opc>
+			<dscrp>Compare ST(0) with ST(1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FCOMP</mnem>
+			<args>m32fp</args>
+			<opc openc="">D8 /3</opc>
+			<dscrp>Compare ST(0) with m32fp and pop register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FCOMP</mnem>
+			<args>m64fp</args>
+			<opc openc="">DC /3</opc>
+			<dscrp>Compare ST(0) with m64fp and pop register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FCOMP</mnem>
+			<args>ST(i)</args>
+			<opc openc="">D8 D8+i</opc>
+			<dscrp>Compare ST(0) with ST(i) and pop register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FCOMP</mnem>
+			<args>void</args>
+			<opc openc="">D8 D9</opc>
+			<dscrp>Compare ST(0) with ST(1) and pop register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FCOMPP</mnem>
+			<args>void</args>
+			<opc openc="">DE D9</opc>
+			<dscrp>Compare ST(0) with ST(1) and pop register stack twice.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FCOMI</mnem>
+			<args>ST,ST(i)</args>
+			<opc openc="">DB F0+i</opc>
+			<dscrp>Compare ST(0) with ST(i) and set status flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FCOMIP</mnem>
+			<args>ST,ST(i)</args>
+			<opc openc="">DF F0+i</opc>
+			<dscrp>Compare ST(0) with ST(i), set status flags accordingly, and pop register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FUCOMI</mnem>
+			<args>ST,ST(i)</args>
+			<opc openc="">DB E8+i</opc>
+			<dscrp>Compare ST(0) with ST(i), check for ordered values, and set status flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FUCOMIP</mnem>
+			<args>ST,ST(i)</args>
+			<opc openc="">DF E8+i</opc>
+			<dscrp>Compare ST(0) with ST(i), check for ordered values, set status flags accordingly, and pop register stack.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FCOS--Cosine.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FCOS</mnem>
+			<args>void</args>
+			<opc openc="">D9 FF</opc>
+			<dscrp>Replace ST(0) with its approximate cosine.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FDECSTP--Decrement Stack-Top Pointer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FDECSTP</mnem>
+			<args>void</args>
+			<opc openc="">D9 F6</opc>
+			<dscrp>Decrement TOP field in FPU status word.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FDIV/FDIVP/FIDIV--Divide.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FDIV</mnem>
+			<args>m32fp</args>
+			<opc openc="">D8 /6</opc>
+			<dscrp>Divide ST(0) by m32fp and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FDIV</mnem>
+			<args>m64fp</args>
+			<opc openc="">DC /6</opc>
+			<dscrp>Divide ST(0) by m64fp and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FDIV</mnem>
+			<args>ST(0),ST(i)</args>
+			<opc openc="">D8 F0+i</opc>
+			<dscrp>Divide ST(0) by ST(i) and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FDIV</mnem>
+			<args>ST(i),ST(0)</args>
+			<opc openc="">DC F8+i</opc>
+			<dscrp>Divide ST(i) by ST(0) and store result in ST(i).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FDIVP</mnem>
+			<args>ST(i),ST(0)</args>
+			<opc openc="">DE F8+i</opc>
+			<dscrp>Divide ST(i) by ST(0), store result in ST(i), and pop the register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FDIVP</mnem>
+			<args>void</args>
+			<opc openc="">DE F9</opc>
+			<dscrp>Divide ST(1) by ST(0), store result in ST(1), and pop the register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FIDIV</mnem>
+			<args>m32int</args>
+			<opc openc="">DA /6</opc>
+			<dscrp>Divide ST(0) by m32int and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FIDIV</mnem>
+			<args>m16int</args>
+			<opc openc="">DE /6</opc>
+			<dscrp>Divide ST(0) by m16int and store result in ST(0).</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FDIVR/FDIVRP/FIDIVR--Reverse Divide.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FDIVR</mnem>
+			<args>m32fp</args>
+			<opc openc="">D8 /7</opc>
+			<dscrp>Divide m32fp by ST(0) and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FDIVR</mnem>
+			<args>m64fp</args>
+			<opc openc="">DC /7</opc>
+			<dscrp>Divide m64fp by ST(0) and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FDIVR</mnem>
+			<args>ST(0),ST(i)</args>
+			<opc openc="">D8 F8+i</opc>
+			<dscrp>Divide ST(i) by ST(0) and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FDIVR</mnem>
+			<args>ST(i),ST(0)</args>
+			<opc openc="">DC F0+i</opc>
+			<dscrp>Divide ST(0) by ST(i) and store result in ST(i).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FDIVRP</mnem>
+			<args>ST(i),ST(0)</args>
+			<opc openc="">DE F0+i</opc>
+			<dscrp>Divide ST(0) by ST(i), store result in ST(i), and pop the register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FDIVRP</mnem>
+			<args>void</args>
+			<opc openc="">DE F1</opc>
+			<dscrp>Divide ST(0) by ST(1), store result in ST(1), and pop the register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FIDIVR</mnem>
+			<args>m32int</args>
+			<opc openc="">DA /7</opc>
+			<dscrp>Divide m32int by ST(0) and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FIDIVR</mnem>
+			<args>m16int</args>
+			<opc openc="">DE /7</opc>
+			<dscrp>Divide m16int by ST(0) and store result in ST(0).</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FFREE--Free Floating-Point Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FFREE</mnem>
+			<args>ST(i)</args>
+			<opc openc="">DD C0+i</opc>
+			<dscrp>Sets tag for ST(i) to empty.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FICOM/FICOMP--Compare Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FICOM</mnem>
+			<args>m16int</args>
+			<opc openc="">DE /2</opc>
+			<dscrp>Compare ST(0) with m16int.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FICOM</mnem>
+			<args>m32int</args>
+			<opc openc="">DA /2</opc>
+			<dscrp>Compare ST(0) with m32int.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FICOMP</mnem>
+			<args>m16int</args>
+			<opc openc="">DE /3</opc>
+			<dscrp>Compare ST(0) with m16int and pop stack register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FICOMP</mnem>
+			<args>m32int</args>
+			<opc openc="">DA /3</opc>
+			<dscrp>Compare ST(0) with m32int and pop stack register.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FILD--Load Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FILD</mnem>
+			<args>m16int</args>
+			<opc openc="">DF /0</opc>
+			<dscrp>Push m16int onto the FPU register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FILD</mnem>
+			<args>m32int</args>
+			<opc openc="">DB /0</opc>
+			<dscrp>Push m32int onto the FPU register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FILD</mnem>
+			<args>m64int</args>
+			<opc openc="">DF /5</opc>
+			<dscrp>Push m64int onto the FPU register stack.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FINCSTP--Increment Stack-Top Pointer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FINCSTP</mnem>
+			<args>void</args>
+			<opc openc="">D9 F7</opc>
+			<dscrp>Increment the TOP field in the FPU status register.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FINIT/FNINIT--Initialize Floating-Point Unit.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FINIT*</mnem>
+			<args>void</args>
+			<opc openc="">9B DB E3</opc>
+			<dscrp>Initialize FPU after checking for pending unmasked floating-point exceptions.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FNINIT</mnem>
+			<args>void</args>
+			<opc openc="">DB E3</opc>
+			<dscrp>Initialize FPU without checking for pending unmasked floating-point exceptions.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FIST/FISTP--Store Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FIST</mnem>
+			<args>m16int</args>
+			<opc openc="">DF /2</opc>
+			<dscrp>Store ST(0) in m16int.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FIST</mnem>
+			<args>m32int</args>
+			<opc openc="">DB /2</opc>
+			<dscrp>Store ST(0) in m32int.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FISTP</mnem>
+			<args>m16int</args>
+			<opc openc="">DF /3</opc>
+			<dscrp>Store ST(0) in m16int and pop register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FISTP</mnem>
+			<args>m32int</args>
+			<opc openc="">DB /3</opc>
+			<dscrp>Store ST(0) in m32int and pop register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FISTP</mnem>
+			<args>m64int</args>
+			<opc openc="">DF /7</opc>
+			<dscrp>Store ST(0) in m64int and pop register stack.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FISTTP--Store Integer with Truncation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FISTTP</mnem>
+			<args>m16int</args>
+			<opc openc="">DF /1</opc>
+			<dscrp>Store ST(0) in m16int with truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FISTTP</mnem>
+			<args>m32int</args>
+			<opc openc="">DB /1</opc>
+			<dscrp>Store ST(0) in m32int with truncation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FISTTP</mnem>
+			<args>m64int</args>
+			<opc openc="">DD /1</opc>
+			<dscrp>Store ST(0) in m64int with truncation.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FLD--Load Floating Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FLD</mnem>
+			<args>m32fp</args>
+			<opc openc="">D9 /0</opc>
+			<dscrp>Push m32fp onto the FPU register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FLD</mnem>
+			<args>m64fp</args>
+			<opc openc="">DD /0</opc>
+			<dscrp>Push m64fp onto the FPU register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FLD</mnem>
+			<args>m80fp</args>
+			<opc openc="">DB /5</opc>
+			<dscrp>Push m80fp onto the FPU register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FLD</mnem>
+			<args>ST(i)</args>
+			<opc openc="">D9 C0+i</opc>
+			<dscrp>Push ST(i) onto the FPU register stack.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FLD1/FLDL2T/FLDL2E/FLDPI/FLDLG2/FLDLN2/FLDZ--Load Constant.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FLD1</mnem>
+			<args>void</args>
+			<opc openc="">D9 E8</opc>
+			<dscrp>Push +1.0 onto the FPU register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FLDL2T</mnem>
+			<args>void</args>
+			<opc openc="">D9 E9</opc>
+			<dscrp>Push log210 onto the FPU register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FLDL2E</mnem>
+			<args>void</args>
+			<opc openc="">D9 EA</opc>
+			<dscrp>Push log2e onto the FPU register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FLDPI</mnem>
+			<args>void</args>
+			<opc openc="">D9 EB</opc>
+			<dscrp>Push p onto the FPU register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FLDLG2</mnem>
+			<args>void</args>
+			<opc openc="">D9 EC</opc>
+			<dscrp>Push log102 onto the FPU register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FLDLN2</mnem>
+			<args>void</args>
+			<opc openc="">D9 ED</opc>
+			<dscrp>Push loge2 onto the FPU register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FLDZ</mnem>
+			<args>void</args>
+			<opc openc="">D9 EE</opc>
+			<dscrp>Push +0.0 onto the FPU register stack.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FLDCW--Load x87 FPU Control Word.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FLDCW</mnem>
+			<args>m2byte</args>
+			<opc openc="">D9 /5</opc>
+			<dscrp>Load FPU control word from m2byte.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FLDENV--Load x87 FPU Environment.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FLDENV</mnem>
+			<args>m14/28byte</args>
+			<opc openc="">D9 /4</opc>
+			<dscrp>Load FPU environment from m14byte or m28byte.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FMUL/FMULP/FIMUL--Multiply.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FMUL</mnem>
+			<args>m32fp</args>
+			<opc openc="">D8 /1</opc>
+			<dscrp>Multiply ST(0) by m32fp and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FMUL</mnem>
+			<args>m64fp</args>
+			<opc openc="">DC /1</opc>
+			<dscrp>Multiply ST(0) by m64fp and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FMUL</mnem>
+			<args>ST(0),ST(i)</args>
+			<opc openc="">D8 C8+i</opc>
+			<dscrp>Multiply ST(0) by ST(i) and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FMUL</mnem>
+			<args>ST(i),ST(0)</args>
+			<opc openc="">DC C8+i</opc>
+			<dscrp>Multiply ST(i) by ST(0) and store result in ST(i).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FMULP</mnem>
+			<args>ST(i),ST(0)</args>
+			<opc openc="">DE C8+i</opc>
+			<dscrp>Multiply ST(i) by ST(0), store result in ST(i), and pop the register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FMULP</mnem>
+			<args>void</args>
+			<opc openc="">DE C9</opc>
+			<dscrp>Multiply ST(1) by ST(0), store result in ST(1), and pop the register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FIMUL</mnem>
+			<args>m32int</args>
+			<opc openc="">DA /1</opc>
+			<dscrp>Multiply ST(0) by m32int and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FIMUL</mnem>
+			<args>m16int</args>
+			<opc openc="">DE /1</opc>
+			<dscrp>Multiply ST(0) by m16int and store result in ST(0).</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FNOP--No Operation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FNOP</mnem>
+			<args>void</args>
+			<opc openc="">D9 D0</opc>
+			<dscrp>No operation is performed.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FPATAN--Partial Arctangent.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FPATAN</mnem>
+			<args>void</args>
+			<opc openc="">D9 F3</opc>
+			<dscrp>Replace ST(1) with arctan(ST(1)/ST(0)) and pop the register stack.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FPREM--Partial Remainder.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FPREM</mnem>
+			<args>void</args>
+			<opc openc="">D9 F8</opc>
+			<dscrp>Replace ST(0) with the remainder obtained from dividing ST(0) by ST(1).</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FPREM1--Partial Remainder.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FPREM1</mnem>
+			<args>void</args>
+			<opc openc="">D9 F5</opc>
+			<dscrp>Replace ST(0) with the IEEE remainder obtained from dividing ST(0) by ST(1).</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FPTAN--Partial Tangent.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FPTAN</mnem>
+			<args>void</args>
+			<opc openc="">D9 F2</opc>
+			<dscrp>Replace ST(0) with its approximate tangent and push 1 onto the FPU stack.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FRNDINT--Round to Integer.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FRNDINT</mnem>
+			<args>void</args>
+			<opc openc="">D9 FC</opc>
+			<dscrp>Round ST(0) to an integer.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FRSTOR--Restore x87 FPU State.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FRSTOR</mnem>
+			<args>m94/108byte</args>
+			<opc openc="">DD /4</opc>
+			<dscrp>Load FPU state from m94byte or m108byte.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FSAVE/FNSAVE--Store x87 FPU State.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FSAVE</mnem>
+			<args>m94/108byte*</args>
+			<opc openc="">9B DD /6</opc>
+			<dscrp>Store FPU state to m94byte or m108byte after checking for pending unmasked floating-point exceptions. Then re-initialize the FPU.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FNSAVE</mnem>
+			<args>m94/108byte</args>
+			<opc openc="">DD /6</opc>
+			<dscrp>Store FPU environment to m94byte or m108byte without checking for pending unmasked floatingpoint exceptions. Then re-initialize the FPU.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FSCALE--Scale.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FSCALE</mnem>
+			<args>void</args>
+			<opc openc="">D9 FD</opc>
+			<dscrp>Scale ST(0) by ST(1).</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FSIN--Sine.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FSIN</mnem>
+			<args>void</args>
+			<opc openc="">D9 FE</opc>
+			<dscrp>Replace ST(0) with the approximate of its sine.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FSINCOS--Sine and Cosine.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FSINCOS</mnem>
+			<args>void</args>
+			<opc openc="">D9 FB</opc>
+			<dscrp>Compute the sine and cosine of ST(0); replace ST(0) with the approximate sine, and push the approximate cosine onto the register stack.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FSQRT--Square Root.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FSQRT</mnem>
+			<args>void</args>
+			<opc openc="">D9 FA</opc>
+			<dscrp>Computes square root of ST(0) and stores the result in ST(0).</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FST/FSTP--Store Floating Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FST</mnem>
+			<args>m32fp</args>
+			<opc openc="">D9 /2</opc>
+			<dscrp>Copy ST(0) to m32fp.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FST</mnem>
+			<args>m64fp</args>
+			<opc openc="">DD /2</opc>
+			<dscrp>Copy ST(0) to m64fp.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FST</mnem>
+			<args>ST(i)</args>
+			<opc openc="">DD D0+i</opc>
+			<dscrp>Copy ST(0) to ST(i).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FSTP</mnem>
+			<args>m32fp</args>
+			<opc openc="">D9 /3</opc>
+			<dscrp>Copy ST(0) to m32fp and pop register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FSTP</mnem>
+			<args>m64fp</args>
+			<opc openc="">DD /3</opc>
+			<dscrp>Copy ST(0) to m64fp and pop register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FSTP</mnem>
+			<args>m80fp</args>
+			<opc openc="">DB /7</opc>
+			<dscrp>Copy ST(0) to m80fp and pop register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FSTP</mnem>
+			<args>ST(i)</args>
+			<opc openc="">DD D8+i</opc>
+			<dscrp>Copy ST(0) to ST(i) and pop register stack.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FSTCW/FNSTCW--Store x87 FPU Control Word.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FSTCW</mnem>
+			<args>m2byte*</args>
+			<opc openc="">9B D9 /7</opc>
+			<dscrp>Store FPU control word to m2byte after checking for pending unmasked floating-point exceptions.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FNSTCW</mnem>
+			<args>m2byte</args>
+			<opc openc="">D9 /7</opc>
+			<dscrp>Store FPU control word to m2byte without checking for pending unmasked floating-point exceptions.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FSTENV/FNSTENV--Store x87 FPU Environment.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FSTENV</mnem>
+			<args>m14/28byte*</args>
+			<opc openc="">9B D9 /6</opc>
+			<dscrp>Store FPU environment to m14byte or m28byte after checking for pending unmasked floating-point exceptions. Then mask all floating-point exceptions.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FNSTENV</mnem>
+			<args>m14/28byte</args>
+			<opc openc="">D9 /6</opc>
+			<dscrp>Store FPU environment to m14byte or m28byte without checking for pending unmasked floatingpoint exceptions. Then mask all floatingpoint exceptions.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FSTSW/FNSTSW--Store x87 FPU Status Word.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FSTSW</mnem>
+			<args>m2byte</args>
+			<opc openc="">9B DD /7</opc>
+			<dscrp>Store FPU status word at m2byte after checking for pending unmasked floating-point exceptions.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FSTSW</mnem>
+			<args>AX*</args>
+			<opc openc="">9B DF E0</opc>
+			<dscrp>Store FPU status word in AX register after checking for pending unmasked floating-point exceptions.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FNSTSW</mnem>
+			<args>m2byte*</args>
+			<opc openc="">DD /7</opc>
+			<dscrp>Store FPU status word at m2byte without checking for pending unmasked floating-point exceptions.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FNSTSW</mnem>
+			<args>AX</args>
+			<opc openc="">DF E0</opc>
+			<dscrp>Store FPU status word in AX register without checking for pending unmasked floating-point exceptions.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FSUB/FSUBP/FISUB--Subtract.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FSUB</mnem>
+			<args>m32fp</args>
+			<opc openc="">D8 /4</opc>
+			<dscrp>Subtract m32fp from ST(0) and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FSUB</mnem>
+			<args>m64fp</args>
+			<opc openc="">DC /4</opc>
+			<dscrp>Subtract m64fp from ST(0) and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FSUB</mnem>
+			<args>ST(0),ST(i)</args>
+			<opc openc="">D8 E0+i</opc>
+			<dscrp>Subtract ST(i) from ST(0) and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FSUB</mnem>
+			<args>ST(i),ST(0)</args>
+			<opc openc="">DC E8+i</opc>
+			<dscrp>Subtract ST(0) from ST(i) and store result in ST(i).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FSUBP</mnem>
+			<args>ST(i),ST(0)</args>
+			<opc openc="">DE E8+i</opc>
+			<dscrp>Subtract ST(0) from ST(i), store result in ST(i), and pop register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FSUBP</mnem>
+			<args>void</args>
+			<opc openc="">DE E9</opc>
+			<dscrp>Subtract ST(0) from ST(1), store result in ST(1), and pop register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FISUB</mnem>
+			<args>m32int</args>
+			<opc openc="">DA /4</opc>
+			<dscrp>Subtract m32int from ST(0) and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FISUB</mnem>
+			<args>m16int</args>
+			<opc openc="">DE /4</opc>
+			<dscrp>Subtract m16int from ST(0) and store result in ST(0).</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FSUBR/FSUBRP/FISUBR--Reverse Subtract.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FSUBR</mnem>
+			<args>m32fp</args>
+			<opc openc="">D8 /5</opc>
+			<dscrp>Subtract ST(0) from m32fp and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FSUBR</mnem>
+			<args>m64fp</args>
+			<opc openc="">DC /5</opc>
+			<dscrp>Subtract ST(0) from m64fp and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FSUBR</mnem>
+			<args>ST(0),ST(i)</args>
+			<opc openc="">D8 E8+i</opc>
+			<dscrp>Subtract ST(0) from ST(i) and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FSUBR</mnem>
+			<args>ST(i),ST(0)</args>
+			<opc openc="">DC E0+i</opc>
+			<dscrp>Subtract ST(i) from ST(0) and store result in ST(i).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FSUBRP</mnem>
+			<args>ST(i),ST(0)</args>
+			<opc openc="">DE E0+i</opc>
+			<dscrp>Subtract ST(i) from ST(0), store result in ST(i), and pop register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FSUBRP</mnem>
+			<args>void</args>
+			<opc openc="">DE E1</opc>
+			<dscrp>Subtract ST(1) from ST(0), store result in ST(1), and pop register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FISUBR</mnem>
+			<args>m32int</args>
+			<opc openc="">DA /5</opc>
+			<dscrp>Subtract ST(0) from m32int and store result in ST(0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FISUBR</mnem>
+			<args>m16int</args>
+			<opc openc="">DE /5</opc>
+			<dscrp>Subtract ST(0) from m16int and store result in ST(0).</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FTST--TEST.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FTST</mnem>
+			<args>void</args>
+			<opc openc="">D9 E4</opc>
+			<dscrp>Compare ST(0) with 0.0.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FUCOM/FUCOMP/FUCOMPP--Unordered Compare Floating Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FUCOM</mnem>
+			<args>ST(i)</args>
+			<opc openc="">DD E0+i</opc>
+			<dscrp>Compare ST(0) with ST(i).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FUCOM</mnem>
+			<args>void</args>
+			<opc openc="">DD E1</opc>
+			<dscrp>Compare ST(0) with ST(1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FUCOMP</mnem>
+			<args>ST(i)</args>
+			<opc openc="">DD E8+i</opc>
+			<dscrp>Compare ST(0) with ST(i) and pop register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FUCOMP</mnem>
+			<args>void</args>
+			<opc openc="">DD E9</opc>
+			<dscrp>Compare ST(0) with ST(1) and pop register stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FUCOMPP</mnem>
+			<args>void</args>
+			<opc openc="">DA E9</opc>
+			<dscrp>Compare ST(0) with ST(1) and pop register stack twice.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FXAM--Examine ModR/M.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FXAM</mnem>
+			<args>void</args>
+			<opc openc="">D9 E5</opc>
+			<dscrp>Classify value or number in ST(0).</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FXCH--Exchange Register Contents.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FXCH</mnem>
+			<args>ST(i)</args>
+			<opc openc="">D9 C8+i</opc>
+			<dscrp>Exchange the contents of ST(0) and ST(i).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FXCH</mnem>
+			<args>void</args>
+			<opc openc="">D9 C9</opc>
+			<dscrp>Exchange the contents of ST(0) and ST(1).</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FXRSTOR--Restore x87 FPU, MMX, XMM, and MXCSR State.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FXRSTOR</mnem>
+			<args>m512byte</args>
+			<opc openc="M">0F AE /1</opc>
+			<dscrp>Restore the x87 FPU, MMX, XMM, and MXCSR register state from m512byte.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>FXRSTOR64</mnem>
+			<args>m512byte</args>
+			<opc openc="M">REX.W+ 0F AE /1</opc>
+			<dscrp>Restore the x87 FPU, MMX, XMM, and MXCSR register state from m512byte.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>FXSAVE--Save x87 FPU, MMX Technology, and SSE State.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FXSAVE</mnem>
+			<args>m512byte</args>
+			<opc openc="M">0F AE /0</opc>
+			<dscrp>Save the x87 FPU, MMX, XMM, and MXCSR register state to m512byte.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>FXSAVE64</mnem>
+			<args>m512byte</args>
+			<opc openc="M">REX.W+ 0F AE /0</opc>
+			<dscrp>Save the x87 FPU, MMX, XMM, and MXCSR register state to m512byte.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>FXTRACT--Extract Exponent and Significand.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FXTRACT</mnem>
+			<args>void</args>
+			<opc openc="">D9 F4</opc>
+			<dscrp>Separate value in ST(0) into exponent and significand, store exponent in ST(0), and push the significand onto the register stack.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FYL2X--Compute y * log  x 2.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FYL2X</mnem>
+			<args>void</args>
+			<opc openc="">D9 F1</opc>
+			<dscrp>Replace ST(1) with (ST(1) * log2ST(0)) and pop the register stack.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>FYL2XP1--Compute y * log  (x + 1) 2.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>FYL2XP1</mnem>
+			<args>void</args>
+			<opc openc="">D9 F9</opc>
+			<dscrp>Replace ST(1) with ST(1) * log2(ST(0) + 1.0) and pop the register stack.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>HADDPD--Packed Double-FP Horizontal Add.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>HADDPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 7C /r</opc>
+			<cpuid>
+				<flag>SSE3</flag>
+			</cpuid>
+			<dscrp>Horizontal add packed double-precision floating-point values from xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VHADDPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 7C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Horizontal add packed double-precision floating-point values from xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VHADDPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 7C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Horizontal add packed double-precision floating-point values from ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>HADDPS--Packed Single-FP Horizontal Add.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>HADDPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">F2 0F 7C /r</opc>
+			<cpuid>
+				<flag>SSE3</flag>
+			</cpuid>
+			<dscrp>Horizontal add packed single-precision floating-point values from xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VHADDPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 7C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Horizontal add packed single-precision floating-point values from xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VHADDPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.F2.0F.WIG 7C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Horizontal add packed single-precision floating-point values from ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>HLT--Halt.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>HLT</mnem>
+			<args>void</args>
+			<opc openc="NP">F4</opc>
+			<dscrp>Halt.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>HSUBPD--Packed Double-FP Horizontal Subtract.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>HSUBPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 7D /r</opc>
+			<cpuid>
+				<flag>SSE3</flag>
+			</cpuid>
+			<dscrp>Horizontal subtract packed double-precision floating-point values from xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VHSUBPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 7D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Horizontal subtract packed double-precision floating-point values from xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VHSUBPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 7D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Horizontal subtract packed double-precision floating-point values from ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>HSUBPS--Packed Single-FP Horizontal Subtract.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>HSUBPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">F2 0F 7D /r</opc>
+			<cpuid>
+				<flag>SSE3</flag>
+			</cpuid>
+			<dscrp>Horizontal subtract packed single-precision floating-point values from xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VHSUBPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 7D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Horizontal subtract packed single-precision floating-point values from xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VHSUBPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.F2.0F.WIG 7D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Horizontal subtract packed single-precision floating-point values from ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>IDIV--Signed Divide.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>IDIV</mnem>
+			<args>r/m8</args>
+			<opc openc="M">F6 /7</opc>
+			<dscrp>Signed divide AX by r/m8, with result stored in: AL &lt;-- Quotient, AH ? Remainder.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>IDIV</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + F6 /7</opc>
+			<dscrp>Signed divide AX by r/m8, with result stored in AL &lt;-- Quotient, AH ? Remainder.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>IDIV</mnem>
+			<args>r/m16</args>
+			<opc openc="M">F7 /7</opc>
+			<dscrp>Signed divide DX:AX by r/m16, with result stored in AX &lt;-- Quotient, DX ? Remainder.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>IDIV</mnem>
+			<args>r/m32</args>
+			<opc openc="M">F7 /7</opc>
+			<dscrp>Signed divide EDX:EAX by r/m32, with result stored in EAX &lt;-- Quotient, EDX ? Remainder.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>IDIV</mnem>
+			<args>r/m64</args>
+			<opc openc="M">REX.W + F7 /7</opc>
+			<dscrp>Signed divide RDX:RAX by r/m64, with result stored in RAX &lt;-- Quotient, RDX ? Remainder.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>IMUL--Signed Multiply.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>IMUL</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">F6 /5</opc>
+			<dscrp>AX&lt;-- AL * r/m byte.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>IMUL</mnem>
+			<args>r/m16</args>
+			<opc openc="M">F7 /5</opc>
+			<dscrp>DX:AX &lt;-- AX * r/m word.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>IMUL</mnem>
+			<args>r/m32</args>
+			<opc openc="M">F7 /5</opc>
+			<dscrp>EDX:EAX &lt;-- EAX * r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>IMUL</mnem>
+			<args>r/m64</args>
+			<opc openc="M">REX.W + F7 /5</opc>
+			<dscrp>RDX:RAX &lt;-- RAX * r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>IMUL</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0F AF /r</opc>
+			<dscrp>word register &lt;-- word register * r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>IMUL</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0F AF /r</opc>
+			<dscrp>doubleword register &lt;-- doubleword register * r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>IMUL</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0F AF /r</opc>
+			<dscrp>Quadword register &lt;-- Quadword register * r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>IMUL</mnem>
+			<args>r16,r/m16,imm8</args>
+			<opc openc="RMI">6B /r ib</opc>
+			<dscrp>word register &lt;-- r/m16 * sign-extended immediate byte.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>IMUL</mnem>
+			<args>r32,r/m32,imm8</args>
+			<opc openc="RMI">6B /r ib</opc>
+			<dscrp>doubleword register &lt;-- r/m32 * signextended immediate byte.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>IMUL</mnem>
+			<args>r64,r/m64,imm8</args>
+			<opc openc="RMI">REX.W + 6B /r ib</opc>
+			<dscrp>Quadword register &lt;-- r/m64 * sign-extended immediate byte.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>IMUL</mnem>
+			<args>r16,r/m16,imm16</args>
+			<opc openc="RMI">69 /r iw</opc>
+			<dscrp>word register &lt;-- r/m16 * immediate word.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>IMUL</mnem>
+			<args>r32,r/m32,imm32</args>
+			<opc openc="RMI">69 /r id</opc>
+			<dscrp>doubleword register &lt;-- r/m32 * immediate doubleword.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>IMUL</mnem>
+			<args>r64,r/m64,imm32</args>
+			<opc openc="RMI">REX.W + 69 /r id</opc>
+			<dscrp>Quadword register &lt;-- r/m64 * immediate doubleword.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)/16/32</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>IN--Input from Port.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>IN</mnem>
+			<args>AL,imm8</args>
+			<opc openc="I">E4 ib</opc>
+			<dscrp>Input byte from imm8 I/O port address into AL.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>IN</mnem>
+			<args>AX,imm8</args>
+			<opc openc="I">E5 ib</opc>
+			<dscrp>Input word from imm8 I/O port address into AX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>IN</mnem>
+			<args>EAX,imm8</args>
+			<opc openc="I">E5 ib</opc>
+			<dscrp>Input dword from imm8 I/O port address into EAX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>IN</mnem>
+			<args>AL,DX</args>
+			<opc openc="NP">EC</opc>
+			<dscrp>Input byte from I/O port in DX into AL.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>IN</mnem>
+			<args>AX,DX</args>
+			<opc openc="NP">ED</opc>
+			<dscrp>Input word from I/O port in DX into AX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>IN</mnem>
+			<args>EAX,DX</args>
+			<opc openc="NP">ED</opc>
+			<dscrp>Input doubleword from I/O port in DX into EAX.</dscrp>
+		</ins>
+		<oprndenc openc="I">
+			<oprnd1>imm8(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>INC--Increment by 1.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>INC</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">FE /0</opc>
+			<dscrp>Increment r/m byte by 1.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>INC</mnem>
+			<args>r/m8</args>
+			<opc openc="M">REX + FE /0</opc>
+			<dscrp>Increment r/m byte by 1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>INC</mnem>
+			<args>r/m16</args>
+			<opc openc="M">FF /0</opc>
+			<dscrp>Increment r/m word by 1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>INC</mnem>
+			<args>r/m32</args>
+			<opc openc="M">FF /0</opc>
+			<dscrp>Increment r/m doubleword by 1.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>INC</mnem>
+			<args>r/m64**</args>
+			<opc openc="M">REX.W + FF /0</opc>
+			<dscrp>Increment r/m quadword by 1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>INC</mnem>
+			<args>r16</args>
+			<opc openc="O">40+ rw</opc>
+			<dscrp>Increment word register by 1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>INC</mnem>
+			<args>r32</args>
+			<opc openc="O">40+ rd</opc>
+			<dscrp>Increment doubleword register by 1.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="O">
+			<oprnd1>opcode + rd(r,w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>INS/INSB/INSW/INSD--Input from Port to String.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>INS</mnem>
+			<args>m8,DX</args>
+			<opc openc="NP">6C</opc>
+			<dscrp>Input byte from I/O port specified in DX into memory location specified in ES:(E)DI or RDI.*.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>INS</mnem>
+			<args>m16,DX</args>
+			<opc openc="NP">6D</opc>
+			<dscrp>Input word from I/O port specified in DX into memory location specified in ES:(E)DI or RDI.1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>INS</mnem>
+			<args>m32,DX</args>
+			<opc openc="NP">6D</opc>
+			<dscrp>Input doubleword from I/O port specified in DX into memory location specified in ES:(E)DI or RDI.1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>INSB</mnem>
+			<args>void</args>
+			<opc openc="NP">6C</opc>
+			<dscrp>Input byte from I/O port specified in DX into memory location specified with ES:(E)DI or RDI.1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>INSW</mnem>
+			<args>void</args>
+			<opc openc="NP">6D</opc>
+			<dscrp>Input word from I/O port specified in DX into memory location specified in ES:(E)DI or RDI.1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>INSD</mnem>
+			<args>void</args>
+			<opc openc="NP">6D</opc>
+			<dscrp>Input doubleword from I/O port specified in DX into memory location specified in ES:(E)DI or RDI.1.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>INSERTPS--Insert Packed Single Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>INSERTPS</mnem>
+			<args>xmm1,xmm2/m32,imm8</args>
+			<opc openc="RMI">66 0F 3A 21 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Insert a single precision floating-point value selected by imm8 from xmm2/m32 into xmm1 at the specified destination element specified by imm8 and zero out destination elements in xmm1 as indicated in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTPS</mnem>
+			<args>xmm1,xmm2,xmm3/m32,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.WIG 21 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Insert a single precision floating point value selected by imm8 from xmm3/m32 and merge into xmm2 at the specified destination element specified by imm8 and zero out destination elements in xmm1 as indicated in imm8.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>INTn/INTO/INT3--Call to Interrupt Procedure.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>INT</mnem>
+			<args>3</args>
+			<opc openc="NP">CC</opc>
+			<dscrp>Interrupt 3--trap to debugger.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>INT</mnem>
+			<args>imm8</args>
+			<opc openc="I">CD ib</opc>
+			<dscrp>Interrupt vector specified by immediate byte.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>INTO</mnem>
+			<args>void</args>
+			<opc openc="NP">CE</opc>
+			<dscrp>Interrupt 4--if overflow flag is 1.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="I">
+			<oprnd1>imm8(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>INVD--Invalidate Internal Caches.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>INVD</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 08</opc>
+			<dscrp>Flush internal caches; initiate flushing of external caches.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>INVLPG--Invalidate TLB Entries.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>INVLPG</mnem>
+			<args>m</args>
+			<opc openc="M">0F 01/7</opc>
+			<dscrp>Invalidate TLB entries for page containing m.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>INVPCID--Invalidate Process-Context Identifier.</brief>
+		<ins x32m="V" x64m="NE">
+			<mnem>INVPCID</mnem>
+			<args>r32,m128</args>
+			<opc openc="RM">66 0F 38 82 /r</opc>
+			<cpuid>
+				<flag>INVPCID</flag>
+			</cpuid>
+			<dscrp>Invalidates entries in the TLBs and paging-structure caches based on invalidation type in r32 and descriptor in m128.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>INVPCID</mnem>
+			<args>r64,m128</args>
+			<opc openc="RM">66 0F 38 82 /r</opc>
+			<cpuid>
+				<flag>INVPCID</flag>
+			</cpuid>
+			<dscrp>Invalidates entries in the TLBs and paging-structure caches based on invalidation type in r64 and descriptor in m128.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(R)</oprnd1>
+			<oprnd2>ModRM:r/m(R)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>IRET/IRETD--Interrupt Return.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>IRET</mnem>
+			<args>void</args>
+			<opc openc="NP">CF</opc>
+			<dscrp>Interrupt return (16-bit operand size).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>IRETD</mnem>
+			<args>void</args>
+			<opc openc="NP">CF</opc>
+			<dscrp>Interrupt return (32-bit operand size).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>IRETQ</mnem>
+			<args>void</args>
+			<opc openc="NP">REX.W + CF</opc>
+			<dscrp>Interrupt return (64-bit operand size).</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>Jcc--Jump if Condition Is Met.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>JA</mnem>
+			<args>rel8</args>
+			<opc openc="D">77 cb</opc>
+			<dscrp>Jump short if above (CF=0 and ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JAE</mnem>
+			<args>rel8</args>
+			<opc openc="D">73 cb</opc>
+			<dscrp>Jump short if above or equal (CF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JB</mnem>
+			<args>rel8</args>
+			<opc openc="D">72 cb</opc>
+			<dscrp>Jump short if below (CF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JBE</mnem>
+			<args>rel8</args>
+			<opc openc="D">76 cb</opc>
+			<dscrp>Jump short if below or equal (CF=1 or ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JC</mnem>
+			<args>rel8</args>
+			<opc openc="D">72 cb</opc>
+			<dscrp>Jump short if carry (CF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>JCXZ</mnem>
+			<args>rel8</args>
+			<opc openc="D">E3 cb</opc>
+			<dscrp>Jump short if CX register is 0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JECXZ</mnem>
+			<args>rel8</args>
+			<opc openc="D">E3 cb</opc>
+			<dscrp>Jump short if ECX register is 0.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>JRCXZ</mnem>
+			<args>rel8</args>
+			<opc openc="D">E3 cb</opc>
+			<dscrp>Jump short if RCX register is 0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JE</mnem>
+			<args>rel8</args>
+			<opc openc="D">74 cb</opc>
+			<dscrp>Jump short if equal (ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JG</mnem>
+			<args>rel8</args>
+			<opc openc="D">7F cb</opc>
+			<dscrp>Jump short if greater (ZF=0 and SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JGE</mnem>
+			<args>rel8</args>
+			<opc openc="D">7D cb</opc>
+			<dscrp>Jump short if greater or equal (SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JL</mnem>
+			<args>rel8</args>
+			<opc openc="D">7C cb</opc>
+			<dscrp>Jump short if less (SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JLE</mnem>
+			<args>rel8</args>
+			<opc openc="D">7E cb</opc>
+			<dscrp>Jump short if less or equal (ZF=1 or SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNA</mnem>
+			<args>rel8</args>
+			<opc openc="D">76 cb</opc>
+			<dscrp>Jump short if not above (CF=1 or ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNAE</mnem>
+			<args>rel8</args>
+			<opc openc="D">72 cb</opc>
+			<dscrp>Jump short if not above or equal (CF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNB</mnem>
+			<args>rel8</args>
+			<opc openc="D">73 cb</opc>
+			<dscrp>Jump short if not below (CF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNBE</mnem>
+			<args>rel8</args>
+			<opc openc="D">77 cb</opc>
+			<dscrp>Jump short if not below or equal (CF=0 and ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNC</mnem>
+			<args>rel8</args>
+			<opc openc="D">73 cb</opc>
+			<dscrp>Jump short if not carry (CF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNE</mnem>
+			<args>rel8</args>
+			<opc openc="D">75 cb</opc>
+			<dscrp>Jump short if not equal (ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNG</mnem>
+			<args>rel8</args>
+			<opc openc="D">7E cb</opc>
+			<dscrp>Jump short if not greater (ZF=1 or SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNGE</mnem>
+			<args>rel8</args>
+			<opc openc="D">7C cb</opc>
+			<dscrp>Jump short if not greater or equal (SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNL</mnem>
+			<args>rel8</args>
+			<opc openc="D">7D cb</opc>
+			<dscrp>Jump short if not less (SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNLE</mnem>
+			<args>rel8</args>
+			<opc openc="D">7F cb</opc>
+			<dscrp>Jump short if not less or equal (ZF=0 and SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNO</mnem>
+			<args>rel8</args>
+			<opc openc="D">71 cb</opc>
+			<dscrp>Jump short if not overflow (OF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNP</mnem>
+			<args>rel8</args>
+			<opc openc="D">7B cb</opc>
+			<dscrp>Jump short if not parity (PF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNS</mnem>
+			<args>rel8</args>
+			<opc openc="D">79 cb</opc>
+			<dscrp>Jump short if not sign (SF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNZ</mnem>
+			<args>rel8</args>
+			<opc openc="D">75 cb</opc>
+			<dscrp>Jump short if not zero (ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JO</mnem>
+			<args>rel8</args>
+			<opc openc="D">70 cb</opc>
+			<dscrp>Jump short if overflow (OF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JP</mnem>
+			<args>rel8</args>
+			<opc openc="D">7A cb</opc>
+			<dscrp>Jump short if parity (PF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JPE</mnem>
+			<args>rel8</args>
+			<opc openc="D">7A cb</opc>
+			<dscrp>Jump short if parity even (PF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JPO</mnem>
+			<args>rel8</args>
+			<opc openc="D">7B cb</opc>
+			<dscrp>Jump short if parity odd (PF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JS</mnem>
+			<args>rel8</args>
+			<opc openc="D">78 cb</opc>
+			<dscrp>Jump short if sign (SF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JZ</mnem>
+			<args>rel8</args>
+			<opc openc="D">74 cb</opc>
+			<dscrp>Jump short if zero (ZF = 1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JA</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 87 cw</opc>
+			<dscrp>Jump near if above (CF=0 and ZF=0). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JA</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 87 cd</opc>
+			<dscrp>Jump near if above (CF=0 and ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JAE</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 83 cw</opc>
+			<dscrp>Jump near if above or equal (CF=0). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JAE</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 83 cd</opc>
+			<dscrp>Jump near if above or equal (CF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JB</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 82 cw</opc>
+			<dscrp>Jump near if below (CF=1). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JB</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 82 cd</opc>
+			<dscrp>Jump near if below (CF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JBE</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 86 cw</opc>
+			<dscrp>Jump near if below or equal (CF=1 or ZF=1). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JBE</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 86 cd</opc>
+			<dscrp>Jump near if below or equal (CF=1 or ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JC</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 82 cw</opc>
+			<dscrp>Jump near if carry (CF=1). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JC</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 82 cd</opc>
+			<dscrp>Jump near if carry (CF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JE</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 84 cw</opc>
+			<dscrp>Jump near if equal (ZF=1). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JE</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 84 cd</opc>
+			<dscrp>Jump near if equal (ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JZ</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 84 cw</opc>
+			<dscrp>Jump near if 0 (ZF=1). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JZ</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 84 cd</opc>
+			<dscrp>Jump near if 0 (ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JG</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 8F cw</opc>
+			<dscrp>Jump near if greater (ZF=0 and SF=OF). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JG</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 8F cd</opc>
+			<dscrp>Jump near if greater (ZF=0 and SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JGE</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 8D cw</opc>
+			<dscrp>Jump near if greater or equal (SF=OF). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JGE</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 8D cd</opc>
+			<dscrp>Jump near if greater or equal (SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JL</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 8C cw</opc>
+			<dscrp>Jump near if less (SF != OF). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JL</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 8C cd</opc>
+			<dscrp>Jump near if less (SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JLE</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 8E cw</opc>
+			<dscrp>Jump near if less or equal (ZF=1 or SF != OF). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JLE</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 8E cd</opc>
+			<dscrp>Jump near if less or equal (ZF=1 or SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JNA</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 86 cw</opc>
+			<dscrp>Jump near if not above (CF=1 or ZF=1). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNA</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 86 cd</opc>
+			<dscrp>Jump near if not above (CF=1 or ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JNAE</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 82 cw</opc>
+			<dscrp>Jump near if not above or equal (CF=1). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNAE</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 82 cd</opc>
+			<dscrp>Jump near if not above or equal (CF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JNB</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 83 cw</opc>
+			<dscrp>Jump near if not below (CF=0). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNB</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 83 cd</opc>
+			<dscrp>Jump near if not below (CF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JNBE</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 87 cw</opc>
+			<dscrp>Jump near if not below or equal (CF=0 and ZF=0). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNBE</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 87 cd</opc>
+			<dscrp>Jump near if not below or equal (CF=0 and ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JNC</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 83 cw</opc>
+			<dscrp>Jump near if not carry (CF=0). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNC</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 83 cd</opc>
+			<dscrp>Jump near if not carry (CF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JNE</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 85 cw</opc>
+			<dscrp>Jump near if not equal (ZF=0). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNE</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 85 cd</opc>
+			<dscrp>Jump near if not equal (ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JNG</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 8E cw</opc>
+			<dscrp>Jump near if not greater (ZF=1 or SF != OF). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNG</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 8E cd</opc>
+			<dscrp>Jump near if not greater (ZF=1 or SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JNGE</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 8C cw</opc>
+			<dscrp>Jump near if not greater or equal (SF != OF). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNGE</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 8C cd</opc>
+			<dscrp>Jump near if not greater or equal (SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JNL</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 8D cw</opc>
+			<dscrp>Jump near if not less (SF=OF). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNL</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 8D cd</opc>
+			<dscrp>Jump near if not less (SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JNLE</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 8F cw</opc>
+			<dscrp>Jump near if not less or equal (ZF=0 and SF=OF). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNLE</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 8F cd</opc>
+			<dscrp>Jump near if not less or equal (ZF=0 and SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JNO</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 81 cw</opc>
+			<dscrp>Jump near if not overflow (OF=0). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNO</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 81 cd</opc>
+			<dscrp>Jump near if not overflow (OF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JNP</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 8B cw</opc>
+			<dscrp>Jump near if not parity (PF=0). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNP</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 8B cd</opc>
+			<dscrp>Jump near if not parity (PF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JNS</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 89 cw</opc>
+			<dscrp>Jump near if not sign (SF=0). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNS</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 89 cd</opc>
+			<dscrp>Jump near if not sign (SF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JNZ</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 85 cw</opc>
+			<dscrp>Jump near if not zero (ZF=0). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JNZ</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 85 cd</opc>
+			<dscrp>Jump near if not zero (ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JO</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 80 cw</opc>
+			<dscrp>Jump near if overflow (OF=1). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JO</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 80 cd</opc>
+			<dscrp>Jump near if overflow (OF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JP</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 8A cw</opc>
+			<dscrp>Jump near if parity (PF=1). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JP</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 8A cd</opc>
+			<dscrp>Jump near if parity (PF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JPE</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 8A cw</opc>
+			<dscrp>Jump near if parity even (PF=1). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JPE</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 8A cd</opc>
+			<dscrp>Jump near if parity even (PF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JPO</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 8B cw</opc>
+			<dscrp>Jump near if parity odd (PF=0). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JPO</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 8B cd</opc>
+			<dscrp>Jump near if parity odd (PF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JS</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 88 cw</opc>
+			<dscrp>Jump near if sign (SF=1). Not supported in 64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JS</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 88 cd</opc>
+			<dscrp>Jump near if sign (SF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JZ</mnem>
+			<args>rel16</args>
+			<opc openc="D">0F 84 cw</opc>
+			<dscrp>Jump near if 0 (ZF=1). Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JZ</mnem>
+			<args>rel32</args>
+			<opc openc="D">0F 84 cd</opc>
+			<dscrp>Jump near if 0 (ZF=1).</dscrp>
+		</ins>
+		<oprndenc openc="D">
+			<oprnd1>Offset</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>JMP--Jump.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>JMP</mnem>
+			<args>rel8</args>
+			<opc openc="D">EB cb</opc>
+			<dscrp>Jump short, RIP = RIP + 8-bit displacement sign extended to 64-bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JMP</mnem>
+			<args>rel16</args>
+			<opc openc="D">E9 cw</opc>
+			<dscrp>Jump near, relative, displacement relative to next instruction. Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JMP</mnem>
+			<args>rel32</args>
+			<opc openc="D">E9 cd</opc>
+			<dscrp>Jump near, relative, RIP = RIP + 32-bit displacement sign extended to 64-bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JMP</mnem>
+			<args>r/m16</args>
+			<opc openc="M">FF /4</opc>
+			<dscrp>Jump near, absolute indirect, address = zeroextended r/m16. Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NS">
+			<mnem>JMP</mnem>
+			<args>r/m32</args>
+			<opc openc="M">FF /4</opc>
+			<dscrp>Jump near, absolute indirect, address given in r/m32. Not supported in 64-bit mode.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>JMP</mnem>
+			<args>r/m64</args>
+			<opc openc="M">FF /4</opc>
+			<dscrp>Jump near, absolute indirect, RIP = 64-Bit offset from register or memory.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>JMP</mnem>
+			<args>ptr16:16</args>
+			<opc openc="D">EA cd</opc>
+			<dscrp>Jump far, absolute, address given in operand.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>JMP</mnem>
+			<args>ptr16:32</args>
+			<opc openc="D">EA cp</opc>
+			<dscrp>Jump far, absolute, address given in operand.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JMP</mnem>
+			<args>m16:16</args>
+			<opc openc="D">FF /5</opc>
+			<dscrp>Jump far, absolute indirect, address given in m16:16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>JMP</mnem>
+			<args>m16:32</args>
+			<opc openc="D">FF /5</opc>
+			<dscrp>Jump far, absolute indirect, address given in m16:32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>JMP</mnem>
+			<args>m16:64</args>
+			<opc openc="D">REX.W + FF /5</opc>
+			<dscrp>Jump far, absolute indirect, address given in m16:64.</dscrp>
+		</ins>
+		<oprndenc openc="D">
+			<oprnd1>Offset</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>LAHF--Load Status Flags into AH Register.</brief>
+		<ins x32m="V" x64m="I*">
+			<mnem>LAHF</mnem>
+			<args>void</args>
+			<opc openc="NP">9F</opc>
+			<dscrp>Load: AH &lt;-- EFLAGS(SF:ZF:0:AF:0:PF:1:CF).</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>LAR--Load Access Rights Byte.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>LAR</mnem>
+			<args>r16,r16/m16</args>
+			<opc openc="RM">0F 02 /r</opc>
+			<dscrp>r16 &lt;-- access rights referenced by r16/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>LAR</mnem>
+			<args>reg,r32/m16 1</args>
+			<opc openc="RM">0F 02 /r</opc>
+			<dscrp>reg &lt;-- access rights referenced by r32/m16.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>LDDQU--Load Unaligned Integer 128 Bits.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>LDDQU</mnem>
+			<args>xmm1,mem</args>
+			<opc openc="RM">F2 0F F0 /r</opc>
+			<cpuid>
+				<flag>SSE3</flag>
+			</cpuid>
+			<dscrp>Load unaligned data from mem and return double quadword in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VLDDQU</mnem>
+			<args>xmm1,m128</args>
+			<opc openc="RM">VEX.128.F2.0F.WIG F0 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Load unaligned packed integer values from mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VLDDQU</mnem>
+			<args>ymm1,m256</args>
+			<opc openc="RM">VEX.256.F2.0F.WIG F0 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Load unaligned packed integer values from mem to ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>LDMXCSR--Load MXCSR Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>LDMXCSR</mnem>
+			<args>m32</args>
+			<opc openc="M">0F,AE,/2</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Load MXCSR register from m32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VLDMXCSR</mnem>
+			<args>m32</args>
+			<opc openc="M">VEX.LZ.0F.WIG AE /2</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Load MXCSR register from m32.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>LDS/LES/LFS/LGS/LSS--Load Far Pointer.</brief>
+		<ins x32m="V" x64m="I">
+			<mnem>LDS</mnem>
+			<args>r16,m16:16</args>
+			<opc openc="RM">C5 /r</opc>
+			<dscrp>Load DS:r16 with far pointer from memory.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>LDS</mnem>
+			<args>r32,m16:32</args>
+			<opc openc="RM">C5 /r</opc>
+			<dscrp>Load DS:r32 with far pointer from memory.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>LSS</mnem>
+			<args>r16,m16:16</args>
+			<opc openc="RM">0F B2 /r</opc>
+			<dscrp>Load SS:r16 with far pointer from memory.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>LSS</mnem>
+			<args>r32,m16:32</args>
+			<opc openc="RM">0F B2 /r</opc>
+			<dscrp>Load SS:r32 with far pointer from memory.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>LSS</mnem>
+			<args>r64,m16:64</args>
+			<opc openc="RM">REX + 0F B2 /r</opc>
+			<dscrp>Load SS:r64 with far pointer from memory.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>LES</mnem>
+			<args>r16,m16:16</args>
+			<opc openc="RM">C4 /r</opc>
+			<dscrp>Load ES:r16 with far pointer from memory.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>LES</mnem>
+			<args>r32,m16:32</args>
+			<opc openc="RM">C4 /r</opc>
+			<dscrp>Load ES:r32 with far pointer from memory.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>LFS</mnem>
+			<args>r16,m16:16</args>
+			<opc openc="RM">0F B4 /r</opc>
+			<dscrp>Load FS:r16 with far pointer from memory.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>LFS</mnem>
+			<args>r32,m16:32</args>
+			<opc openc="RM">0F B4 /r</opc>
+			<dscrp>Load FS:r32 with far pointer from memory.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>LFS</mnem>
+			<args>r64,m16:64</args>
+			<opc openc="RM">REX + 0F B4 /r</opc>
+			<dscrp>Load FS:r64 with far pointer from memory.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>LGS</mnem>
+			<args>r16,m16:16</args>
+			<opc openc="RM">0F B5 /r</opc>
+			<dscrp>Load GS:r16 with far pointer from memory.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>LGS</mnem>
+			<args>r32,m16:32</args>
+			<opc openc="RM">0F B5 /r</opc>
+			<dscrp>Load GS:r32 with far pointer from memory.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>LGS</mnem>
+			<args>r64,m16:64</args>
+			<opc openc="RM">REX + 0F B5 /r</opc>
+			<dscrp>Load GS:r64 with far pointer from memory.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>LEA--Load Effective Address.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>LEA</mnem>
+			<args>r16,m</args>
+			<opc openc="RM">8D /r</opc>
+			<dscrp>Store effective address for m in register r16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>LEA</mnem>
+			<args>r32,m</args>
+			<opc openc="RM">8D /r</opc>
+			<dscrp>Store effective address for m in register r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>LEA</mnem>
+			<args>r64,m</args>
+			<opc openc="RM">REX.W + 8D /r</opc>
+			<dscrp>Store effective address for m in register r64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>LEAVE--High Level Procedure Exit.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>LEAVE</mnem>
+			<args>void</args>
+			<opc openc="NP">C9</opc>
+			<dscrp>Set SP to BP, then pop BP.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>LEAVE</mnem>
+			<args>void</args>
+			<opc openc="NP">C9</opc>
+			<dscrp>Set ESP to EBP, then pop EBP.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>LEAVE</mnem>
+			<args>void</args>
+			<opc openc="NP">C9</opc>
+			<dscrp>Set RSP to RBP, then pop RBP.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>LFENCE--Load Fence.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>LFENCE</mnem>
+			<args>void</args>
+			<opc openc="NP">0F AE E8</opc>
+			<dscrp>Serializes load operations.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>LGDT/LIDT--Load Global/Interrupt Descriptor Table Register.</brief>
+		<ins x32m="V" x64m="NE">
+			<mnem>LGDT</mnem>
+			<args>m16&amp;32</args>
+			<opc openc="M">0F 01 /2</opc>
+			<dscrp>Load m into GDTR.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>LIDT</mnem>
+			<args>m16&amp;32</args>
+			<opc openc="M">0F 01 /3</opc>
+			<dscrp>Load m into IDTR.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>LGDT</mnem>
+			<args>m16&amp;64</args>
+			<opc openc="M">0F 01 /2</opc>
+			<dscrp>Load m into GDTR.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>LIDT</mnem>
+			<args>m16&amp;64</args>
+			<opc openc="M">0F 01 /3</opc>
+			<dscrp>Load m into IDTR.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>LLDT--Load Local Descriptor Table Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>LLDT</mnem>
+			<args>r/m16</args>
+			<opc openc="M">0F 00 /2</opc>
+			<dscrp>Load segment selector r/m16 into LDTR.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>LMSW--Load Machine Status Word.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>LMSW</mnem>
+			<args>r/m16</args>
+			<opc openc="M">0F 01 /6</opc>
+			<dscrp>Loads r/m 16 in machine status word of CR0.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>LOCK--Assert LOCK# Signal Prefix.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>LOCK</mnem>
+			<args>void</args>
+			<opc openc="NP">F0</opc>
+			<cpuid>
+				<flag>#Asserts LOCK</flag>
+			</cpuid>
+			<dscrp> signal for duration of the accompanying instruction.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>LODS/LODSB/LODSW/LODSD/LODSQ--Load String.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>LODS</mnem>
+			<args>m8</args>
+			<opc openc="NP">AC</opc>
+			<dscrp>For legacy mode, Load byte at address DS:(E)SI into AL. For 64-bit mode load byte at address (R)SI into AL.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>LODS</mnem>
+			<args>m16</args>
+			<opc openc="NP">AD</opc>
+			<dscrp>For legacy mode, Load word at address DS:(E)SI into AX. For 64-bit mode load word at address (R)SI into AX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>LODS</mnem>
+			<args>m32</args>
+			<opc openc="NP">AD</opc>
+			<dscrp>For legacy mode, Load dword at address DS:(E)SI into EAX. For 64-bit mode load dword at address (R)SI into EAX.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>LODS</mnem>
+			<args>m64</args>
+			<opc openc="NP">REX.W + AD</opc>
+			<dscrp>Load qword at address (R)SI into RAX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>LODSB</mnem>
+			<args>void</args>
+			<opc openc="NP">AC</opc>
+			<dscrp>For legacy mode, Load byte at address DS:(E)SI into AL. For 64-bit mode load byte at address (R)SI into AL.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>LODSW</mnem>
+			<args>void</args>
+			<opc openc="NP">AD</opc>
+			<dscrp>For legacy mode, Load word at address DS:(E)SI into AX. For 64-bit mode load word at address (R)SI into AX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>LODSD</mnem>
+			<args>void</args>
+			<opc openc="NP">AD</opc>
+			<dscrp>For legacy mode, Load dword at address DS:(E)SI into EAX. For 64-bit mode load dword at address (R)SI into EAX.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>LODSQ</mnem>
+			<args>void</args>
+			<opc openc="NP">REX.W + AD</opc>
+			<dscrp>Load qword at address (R)SI into RAX.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>LOOP/LOOPcc--Loop According to ECX Counter.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>LOOP</mnem>
+			<args>rel8</args>
+			<opc openc="D">E2 cb</opc>
+			<dscrp>Decrement count; jump short if count != 0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>LOOPE</mnem>
+			<args>rel8</args>
+			<opc openc="D">E1 cb</opc>
+			<dscrp>Decrement count; jump short if count != 0 and ZF = 1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>LOOPNE</mnem>
+			<args>rel8</args>
+			<opc openc="D">E0 cb</opc>
+			<dscrp>Decrement count; jump short if count != 0 and ZF = 0.</dscrp>
+		</ins>
+		<oprndenc openc="D">
+			<oprnd1>Offset</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>LSL--Load Segment Limit.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>LSL</mnem>
+			<args>r16,r16/m16*</args>
+			<opc openc="RM">0F 03 /r</opc>
+			<dscrp>Load: r16 &lt;-- segment limit, selector r16/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>LSL</mnem>
+			<args>r32,r32/m16*</args>
+			<opc openc="RM">0F 03 /r</opc>
+			<dscrp>Load: r32 &lt;-- segment limit, selector r32/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>LSL</mnem>
+			<args>r64,r32/m16</args>
+			<opc openc="RM">REX.W + 0F 03 /r</opc>
+			<dscrp>Load: r64 &lt;-- segment limit, selector r32/m16.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>LTR--Load Task Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>LTR</mnem>
+			<args>r/m16</args>
+			<opc openc="M">0F 00 /3</opc>
+			<dscrp>Load r/m16 into task register.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>LZCNT--Count the Number of Leading Zero Bits.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>LZCNT</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">F3 0F BD /r</opc>
+			<cpuid>
+				<flag>LZCNT</flag>
+			</cpuid>
+			<dscrp>Count the number of leading zero bits in r/m16, return result in r16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>LZCNT</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">F3 0F BD /r</opc>
+			<cpuid>
+				<flag>LZCNT</flag>
+			</cpuid>
+			<dscrp>Count the number of leading zero bits in r/m32, return result in r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>LZCNT</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">F3 REX.W 0F BD /r</opc>
+			<cpuid>
+				<flag>LZCNT</flag>
+			</cpuid>
+			<dscrp>Count the number of leading zero bits in r/m64, return result in r64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MASKMOVDQU--Store Selected Bytes of Double Quadword.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MASKMOVDQU</mnem>
+			<args>xmm1,xmm2</args>
+			<opc openc="RM">66 0F F7 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Selectively write bytes from xmm1 to memory location using the byte mask in xmm2. The default memory location is specified by DS:DI/EDI/RDI.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMASKMOVDQU</mnem>
+			<args>xmm1,xmm2</args>
+			<opc openc="RM">VEX.128.66.0F.WIG F7 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Selectively write bytes from xmm1 to memory location using the byte mask in xmm2. The default memory location is specified by DS:DI/EDI/RDI.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MASKMOVQ--Store Selected Bytes of Quadword.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MASKMOVQ</mnem>
+			<args>mm1,mm2</args>
+			<opc openc="RM">0F F7 /r</opc>
+			<dscrp>Selectively write bytes from mm1 to memory location using the byte mask in mm2. The default memory location is specified by DS:DI/EDI/RDI.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MAXPD--Return Maximum Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MAXPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 5F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the maximum double-precision floating-point values between xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 5F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the maximum double-precision floating-point values between xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 5F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the maximum packed double-precision floating-point values between ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MAXPS--Return Maximum Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MAXPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 5F /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the maximum single-precision floatingpoint values between xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 5F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the maximum single-precision floatingpoint values between xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 5F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the maximum single double-precision floating-point values between ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MAXSD--Return Maximum Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MAXSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 5F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the maximum scalar double-precision floating-point value between xmm2/mem64 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.LIG.F2.0F.WIG 5F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the maximum scalar double-precision floating-point value between xmm3/mem64 and xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MAXSS--Return Maximum Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MAXSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 5F /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the maximum scalar single-precision floating-point value between xmm2/mem32 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMAXSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.LIG.F3.0F.WIG 5F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the maximum scalar single-precision floating-point value between xmm3/mem32 and xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MFENCE--Memory Fence.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MFENCE</mnem>
+			<args>void</args>
+			<opc openc="NP">0F AE F0</opc>
+			<dscrp>Serializes load and store operations.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MINPD--Return Minimum Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MINPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 5D /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the minimum double-precision floating-point values between xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 5D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the minimum double-precision floatingpoint values between xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 5D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the minimum packed double-precision floating-point values between ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MINPS--Return Minimum Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MINPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 5D /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the minimum single-precision floatingpoint values between xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 5D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the minimum single-precision floatingpoint values between xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 5D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the minimum single double-precision floating-point values between ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MINSD--Return Minimum Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MINSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 5D /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Return the minimum scalar double-precision floating-point value between xmm2/mem64 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.LIG.F2.0F.WIG 5D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the minimum scalar double precision floating-point value between xmm3/mem64 and xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MINSS--Return Minimum Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MINSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 5D /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Return the minimum scalar single-precision floating-point value between xmm2/mem32 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMINSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.LIG.F3.0F.WIG 5D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the minimum scalar single precision floating-point value between xmm3/mem32 and xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MONITOR--Set Up Monitor Address.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MONITOR</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 01 C8</opc>
+			<dscrp>Sets up a linear address range to be monitored by hardware and activates the monitor. The address range should be a writeback memory caching type. The address is DS:EAX (DS:RAX in 64-bit mode).</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOV--Move.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r/m8,r8</args>
+			<opc openc="MR">88 /r</opc>
+			<dscrp>Move r8 to r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r/m8***,r8***</args>
+			<opc openc="MR">REX + 88 /r</opc>
+			<dscrp>Move r8 to r/m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r/m16,r16</args>
+			<opc openc="MR">89 /r</opc>
+			<dscrp>Move r16 to r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r/m32,r32</args>
+			<opc openc="MR">89 /r</opc>
+			<dscrp>Move r32 to r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r/m64,r64</args>
+			<opc openc="MR">REX.W + 89 /r</opc>
+			<dscrp>Move r64 to r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r8,r/m8</args>
+			<opc openc="RM">8A /r</opc>
+			<dscrp>Move r/m8 to r8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r8***,r/m8***</args>
+			<opc openc="RM">REX + 8A /r</opc>
+			<dscrp>Move r/m8 to r8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">8B /r</opc>
+			<dscrp>Move r/m16 to r16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">8B /r</opc>
+			<dscrp>Move r/m32 to r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 8B /r</opc>
+			<dscrp>Move r/m64 to r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r/m16,Sreg**</args>
+			<opc openc="MR">8C /r</opc>
+			<dscrp>Move segment register to r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r/m64,Sreg**</args>
+			<opc openc="MR">REX.W + 8C /r</opc>
+			<dscrp>Move zero extended 16-bit segment register to r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>Sreg,r/m16**</args>
+			<opc openc="RM">8E /r</opc>
+			<dscrp>Move r/m16 to segment register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>Sreg,r/m64**</args>
+			<opc openc="RM">REX.W + 8E /r</opc>
+			<dscrp>Move lower 16 bits of r/m64 to segment register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>AL,moffs8*</args>
+			<opc openc="FD">A0</opc>
+			<dscrp>Move byte at (seg:offset) to AL.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOV</mnem>
+			<args>AL,moffs8*</args>
+			<opc openc="FD">REX.W + A0</opc>
+			<dscrp>Move byte at (offset) to AL.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>AX,moffs16*</args>
+			<opc openc="FD">A1</opc>
+			<dscrp>Move word at (seg:offset) to AX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>EAX,moffs32*</args>
+			<opc openc="FD">A1</opc>
+			<dscrp>Move doubleword at (seg:offset) to EAX.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOV</mnem>
+			<args>RAX,moffs64*</args>
+			<opc openc="FD">REX.W + A1</opc>
+			<dscrp>Move quadword at (offset) to RAX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>moffs8,AL***</args>
+			<opc openc="TD">A2</opc>
+			<dscrp>Move AL to (seg:offset).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOV</mnem>
+			<args>moffs8,AL</args>
+			<opc openc="TD">REX.W + A2</opc>
+			<dscrp>Move AL to (offset).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>moffs16*,AX</args>
+			<opc openc="TD">A3</opc>
+			<dscrp>Move AX to (seg:offset).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>moffs32*,EAX</args>
+			<opc openc="TD">A3</opc>
+			<dscrp>Move EAX to (seg:offset).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOV</mnem>
+			<args>moffs64*,RAX</args>
+			<opc openc="TD">REX.W + A3</opc>
+			<dscrp>Move RAX to (offset).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r8,imm8***</args>
+			<opc openc="OI">B0+ rb ib</opc>
+			<dscrp>Move imm8 to r8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r8,imm8</args>
+			<opc openc="OI">REX + B0+ rb ib</opc>
+			<dscrp>Move imm8 to r8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r16,imm16</args>
+			<opc openc="OI">B8+ rw iw</opc>
+			<dscrp>Move imm16 to r16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r32,imm32</args>
+			<opc openc="OI">B8+ rd id</opc>
+			<dscrp>Move imm32 to r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r64,imm64</args>
+			<opc openc="OI">REX.W + B8+ rd io</opc>
+			<dscrp>Move imm64 to r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r/m8,imm8</args>
+			<opc openc="MI">C6 /0 ib</opc>
+			<dscrp>Move imm8 to r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r/m8***,imm8</args>
+			<opc openc="MI">REX + C6 /0 ib</opc>
+			<dscrp>Move imm8 to r/m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r/m16,imm16</args>
+			<opc openc="MI">C7 /0 iw</opc>
+			<dscrp>Move imm16 to r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r/m32,imm32</args>
+			<opc openc="MI">C7 /0 id</opc>
+			<dscrp>Move imm32 to r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r/m64,imm32</args>
+			<opc openc="MI">REX.W + C7 /0 io</opc>
+			<dscrp>Move imm32 sign extended to 64-bits to r/m64.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="FD">
+			<oprnd1>AL/AX/EAX/RAX</oprnd1>
+			<oprnd2>Moffs</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="TD">
+			<oprnd1>Moffs(w)</oprnd1>
+			<oprnd2>AL/AX/EAX/RAX</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="OI">
+			<oprnd1>opcode + rd(w)</oprnd1>
+			<oprnd2>imm8(r)/16/32/64</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>imm8(r)/16/32/64</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOV--Move to/from Control Registers.</brief>
+		<ins x32m="V" x64m="NE">
+			<mnem>MOV</mnem>
+			<args>r32,CR0-CR7</args>
+			<opc openc="MR">0F 20/r</opc>
+			<dscrp>Move control register to r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r64,CR0-CR7</args>
+			<opc openc="MR">0F 20/r</opc>
+			<dscrp>Move extended control register to r64. 1.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r64,CR8</args>
+			<opc openc="MR">REX.R + 0F 20 /0</opc>
+			<dscrp>Move extended CR8 to r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>MOV</mnem>
+			<args>CR0-CR7,r32</args>
+			<opc openc="RM">0F 22 /r</opc>
+			<dscrp>Move r32 to control register.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOV</mnem>
+			<args>CR0-CR7,r64</args>
+			<opc openc="RM">0F 22 /r</opc>
+			<dscrp>Move r64 to extended control register. 1.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOV</mnem>
+			<args>CR8,r64</args>
+			<opc openc="RM">REX.R + 0F 22 /0</opc>
+			<dscrp>Move r64 to extended CR8.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOV--Move to/from Debug Registers.</brief>
+		<ins x32m="V" x64m="NE">
+			<mnem>MOV</mnem>
+			<args>r32,DR0-DR7</args>
+			<opc openc="MR">0F 21/r</opc>
+			<dscrp>Move debug register to r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOV</mnem>
+			<args>r64,DR0-DR7</args>
+			<opc openc="MR">0F 21/r</opc>
+			<dscrp>Move extended debug register to r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>MOV</mnem>
+			<args>DR0-DR7,r32</args>
+			<opc openc="RM">0F 23 /r</opc>
+			<dscrp>Move r32 to debug register.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOV</mnem>
+			<args>DR0-DR7,r64</args>
+			<opc openc="RM">0F 23 /r</opc>
+			<dscrp>Move r64 to extended debug register.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVAPD--Move Aligned Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVAPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 28 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision floating-point values from xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVAPD</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">66 0F 29 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision floating-point values from xmm1 to xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 28 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG 29 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 28 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from ymm2/mem to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPD</mnem>
+			<args>ymm2/m256,ymm1</args>
+			<opc openc="MR">VEX.256.66.0F.WIG 29 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed double-precision floatingpoint values from ymm1 to ymm2/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVAPS--Move Aligned Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVAPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 28 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision floating-point values from xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVAPS</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">0F 29 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision floating-point values from xmm1 to xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.0F.WIG 28 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floatingpoint values from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">VEX.128.0F.WIG 29 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floatingpoint values from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.0F.WIG 28 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floatingpoint values from ymm2/mem to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVAPS</mnem>
+			<args>ymm2/m256,ymm1</args>
+			<opc openc="MR">VEX.256.0F.WIG 29 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed single-precision floatingpoint values from ymm1 to ymm2/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVBE--Move Data After Swapping Bytes.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVBE</mnem>
+			<args>r16,m16</args>
+			<opc openc="RM">0F 38 F0 /r</opc>
+			<dscrp>Reverse byte order in m16 and move to r16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVBE</mnem>
+			<args>r32,m32</args>
+			<opc openc="RM">0F 38 F0 /r</opc>
+			<dscrp>Reverse byte order in m32 and move to r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOVBE</mnem>
+			<args>r64,m64</args>
+			<opc openc="RM">REX.W + 0F 38 F0 /r</opc>
+			<dscrp>Reverse byte order in m64 and move to r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVBE</mnem>
+			<args>m16,r16</args>
+			<opc openc="MR">0F 38 F1 /r</opc>
+			<dscrp>Reverse byte order in r16 and move to m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVBE</mnem>
+			<args>m32,r32</args>
+			<opc openc="MR">0F 38 F1 /r</opc>
+			<dscrp>Reverse byte order in r32 and move to m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOVBE</mnem>
+			<args>m64,r64</args>
+			<opc openc="MR">REX.W + 0F 38 F1 /r</opc>
+			<dscrp>Reverse byte order in r64 and move to m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVD/MOVQ--Move Doubleword/Move Quadword.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVD</mnem>
+			<args>mm,r/m32</args>
+			<opc openc="RM">0F 6E /r</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Move doubleword from r/m32 to mm.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOVQ</mnem>
+			<args>mm,r/m64</args>
+			<opc openc="RM">REX.W + 0F 6E /r</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Move quadword from r/m64 to mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVD</mnem>
+			<args>r/m32,mm</args>
+			<opc openc="MR">0F 7E /r</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Move doubleword from mm to r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOVQ</mnem>
+			<args>r/m64,mm</args>
+			<opc openc="MR">REX.W + 0F 7E /r</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Move quadword from mm to r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVD</mnem>
+			<args>xmm1,r32/m32</args>
+			<opc openc="RM">VEX.128.66.0F.W0 6E /</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move doubleword from r/m32 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>xmm1,r64/m64</args>
+			<opc openc="RM">VEX.128.66.0F.W1 6E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move quadword from r/m64 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVD</mnem>
+			<args>xmm,r/m32</args>
+			<opc openc="RM">66 0F 6E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move doubleword from r/m32 to xmm.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOVQ</mnem>
+			<args>xmm,r/m64</args>
+			<opc openc="RM">66 REX.W 0F 6E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move quadword from r/m64 to xmm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVD</mnem>
+			<args>r/m32,xmm</args>
+			<opc openc="MR">66 0F 7E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move doubleword from xmm register to r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOVQ</mnem>
+			<args>r/m64,xmm</args>
+			<opc openc="MR">66 REX.W 0F 7E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm register to r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVD</mnem>
+			<args>r32/m32,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.W0 7E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move doubleword from xmm1 register to r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>r64/m64,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.W1 7E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm1 register to r/m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVDDUP--Move One Double-FP and Duplicate.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVDDUP</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 12 /r</opc>
+			<cpuid>
+				<flag>SSE3</flag>
+			</cpuid>
+			<dscrp>Move one double-precision floating-point value from the lower 64-bit operand in xmm2/m64 to xmm1 and duplicate.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDDUP</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.F2.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point values from xmm2/mem and duplicate into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDDUP</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.F2.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move even index double-precision floatingpoint values from ymm2/mem and duplicate each element into ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVDQA--Move Aligned Double Quadword.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVDQA</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 6F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move aligned double quadword from xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVDQA</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">66 0F 7F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move aligned double quadword from xmm1 to xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 6F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed integer values from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG 7F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed integer values from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 6F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed integer values from ymm2/mem to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQA</mnem>
+			<args>ymm2/m256,ymm1</args>
+			<opc openc="MR">VEX.256.66.0F.WIG 7F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move aligned packed integer values from ymm1 to ymm2/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVDQU--Move Unaligned Double Quadword.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVDQU</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">F3 0F 6F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move unaligned double quadword from xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVDQU</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">F3 0F 7F /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move unaligned double quadword from xmm1 to xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG 6F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed integer values from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">VEX.128.F3.0F.WIG 7F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed integer values from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.F3.0F.WIG 6F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed integer values from ymm2/mem to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVDQU</mnem>
+			<args>ymm2/m256,ymm1</args>
+			<opc openc="MR">VEX.256.F3.0F.WIG 7F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed integer values from ymm1 to ymm2/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVDQ2Q--Move Quadword from XMM to MMX Technology Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVDQ2Q</mnem>
+			<args>mm,xmm</args>
+			<opc openc="RM">F2 0F D6 /r</opc>
+			<dscrp>Move low quadword from xmm to mmx register.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVHLPS--Move Packed Single-Precision Floating-Point Values High to Low.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVHLPS</mnem>
+			<args>xmm1,xmm2</args>
+			<opc openc="RM">0F 12 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floatingpoint values from high quadword of xmm2 to low quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHLPS</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floatingpoint values from high quadword of xmm3 and low quadword of xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVHPD--Move High Packed Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVHPD</mnem>
+			<args>xmm,m64</args>
+			<opc openc="RM">66 0F 16 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from m64 to high quadword of xmm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVHPD</mnem>
+			<args>m64,xmm</args>
+			<opc openc="MR">66 0F 17 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from high quadword of xmm to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPD</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 16 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge double-precision floating-point value from m64 and the low quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPD</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG 17/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point values from high quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVHPS--Move High Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVHPS</mnem>
+			<args>xmm,m64</args>
+			<opc openc="RM">0F 16 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floatingpoint values from m64 to high quadword of xmm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVHPS</mnem>
+			<args>m64,xmm</args>
+			<opc openc="MR">0F 17 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floatingpoint values from high quadword of xmm to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPS</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 16 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floatingpoint values from m64 and the low quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVHPS</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">VEX.128.0F.WIG 17/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floatingpoint values from high quadword of xmm1to m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVLHPS--Move Packed Single-Precision Floating-Point Values Low to High.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVLHPS</mnem>
+			<args>xmm1,xmm2</args>
+			<opc openc="RM">0F 16 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floatingpoint values from low quadword of xmm2 to high quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLHPS</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 16 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floatingpoint values from low quadword of xmm3 and low quadword of xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVLPD--Move Low Packed Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVLPD</mnem>
+			<args>xmm,m64</args>
+			<opc openc="RM">66 0F 12 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point value from m64 to low quadword of xmm register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVLPD</mnem>
+			<args>m64,xmm</args>
+			<opc openc="MR">66 0F 13 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point nvalue from low quadword of xmm register to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPD</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge double-precision floating-point value from m64 and the high quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPD</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG 13/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move double-precision floating-point values from low quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVLPS--Move Low Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVLPS</mnem>
+			<args>xmm,m64</args>
+			<opc openc="RM">0F 12 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floatingpoint values from m64 to low quadword of xmm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVLPS</mnem>
+			<args>m64,xmm</args>
+			<opc openc="MR">0F 13 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floatingpoint values from low quadword of xmm to m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPS</mnem>
+			<args>xmm2,xmm1,m64</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge two packed single-precision floatingpoint values from m64 and the high quadword of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVLPS</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">VEX.128.0F.WIG 13/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move two packed single-precision floatingpoint values from low quadword of xmm1 to m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVMSKPD--Extract Packed Double-Precision Floating-Point Sign Mask.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVMSKPD</mnem>
+			<args>reg,xmm</args>
+			<opc openc="RM">66 0F 50 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Extract 2-bit sign mask from xmm and store in reg. The upper bits of r32 or r64 are filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVMSKPD</mnem>
+			<args>reg,xmm2</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 50 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract 2-bit sign mask from xmm2 and store in reg. The upper bits of r32 or r64 are zeroed.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVMSKPD</mnem>
+			<args>reg,ymm2</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 50 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract 4-bit sign mask from ymm2 and store in reg. The upper bits of r32 or r64 are zeroed.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVMSKPS--Extract Packed Single-Precision Floating-Point Sign Mask.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVMSKPS</mnem>
+			<args>reg,xmm</args>
+			<opc openc="RM">0F 50 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Extract 4-bit sign mask from xmm and store in reg. The upper bits of r32 or r64 are filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVMSKPS</mnem>
+			<args>reg,xmm2</args>
+			<opc openc="RM">VEX.128.0F.WIG 50 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract 4-bit sign mask from xmm2 and store in reg. The upper bits of r32 or r64 are zeroed.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVMSKPS</mnem>
+			<args>reg,ymm2</args>
+			<opc openc="RM">VEX.256.0F.WIG 50 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract 8-bit sign mask from ymm2 and store in reg. The upper bits of r32 or r64 are zeroed.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVNTDQA--Load Double Quadword Non-Temporal Aligned Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVNTDQA</mnem>
+			<args>xmm1,m128</args>
+			<opc openc="RM">66 0F 38 2A /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Move double quadword from m128 to xmm using non-temporal hint if WC memory type.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQA</mnem>
+			<args>xmm1,m128</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 2A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move double quadword from m128 to xmm using non-temporal hint if WC memory type.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQA</mnem>
+			<args>ymm1,m256</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 2A /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Move 256-bit data from m256 to ymm using non-temporal hint if WC memory type.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVNTDQ--Store Double Quadword Using Non-Temporal Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVNTDQ</mnem>
+			<args>m128,xmm</args>
+			<opc openc="MR">66 0F E7 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move double quadword from xmm to m128 using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQ</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG E7 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move packed integer values in xmm1 to m128 using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTDQ</mnem>
+			<args>m256,ymm1</args>
+			<opc openc="MR">VEX.256.66.0F.WIG E7 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move packed integer values in ymm1 to m256 using non-temporal hint.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVNTI--Store Doubleword Using Non-Temporal Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVNTI</mnem>
+			<args>m32,r32</args>
+			<opc openc="MR">0F C3 /r</opc>
+			<dscrp>Move doubleword from r32 to m32 using nontemporal hint.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOVNTI</mnem>
+			<args>m64,r64</args>
+			<opc openc="MR">REX.W + 0F C3 /r</opc>
+			<dscrp>Move quadword from r64 to m64 using nontemporal hint.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVNTPD--Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVNTPD</mnem>
+			<args>m128,xmm</args>
+			<opc openc="MR">66 0F 2B /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision floating-point values from xmm to m128 using nontemporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPD</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG 2B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision values in xmm1 to m128 using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPD</mnem>
+			<args>m256,ymm1</args>
+			<opc openc="MR">VEX.256.66.0F.WIG 2B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision values in ymm1 to m256 using non-temporal hint.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVNTPS--Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVNTPS</mnem>
+			<args>m128,xmm</args>
+			<opc openc="MR">0F 2B /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision floating-point values from xmm to m128 using nontemporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPS</mnem>
+			<args>m128,xmm1</args>
+			<opc openc="MR">VEX.128.0F.WIG 2B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision values xmm1 to mem using non-temporal hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVNTPS</mnem>
+			<args>m256,ymm1</args>
+			<opc openc="MR">VEX.256.0F.WIG 2B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision values ymm1 to mem using non-temporal hint.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVNTQ--Store of Quadword Using Non-Temporal Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVNTQ</mnem>
+			<args>m64,mm</args>
+			<opc openc="MR">0F E7 /r</opc>
+			<dscrp>Move quadword from mm to m64 using nontemporal hint.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVQ--Move Quadword.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVQ</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F 6F /r</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Move quadword from mm/m64 to mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVQ</mnem>
+			<args>mm/m64,mm</args>
+			<opc openc="MR">0F 7F /r</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Move quadword from mm to mm/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F3 0F 7E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm2/mem64 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>xmm1,xmm2</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG 7E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm2 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>xmm1,m64</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG 7E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Load quadword from m64 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVQ</mnem>
+			<args>xmm2/m64,xmm1</args>
+			<opc openc="MR">66 0F D6 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm1 to xmm2/mem64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVQ</mnem>
+			<args>xmm1/m64,xmm2</args>
+			<opc openc="MR">VEX.128.66.0F.WIG D6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move quadword from xmm2 register to xmm1/m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVQ2DQ--Move Quadword from MMX Technology to XMM Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVQ2DQ</mnem>
+			<args>xmm,mm</args>
+			<opc openc="RM">F3 0F D6 /r</opc>
+			<dscrp>Move quadword from mmx to low quadword of xmm.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVS/MOVSB/MOVSW/MOVSD/MOVSQ--Move Data from String to String \.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVS</mnem>
+			<args>m8,m8</args>
+			<opc openc="NP">A4</opc>
+			<dscrp>For legacy mode, Move byte from address DS:(E)SI to ES:(E)DI. For 64-bit mode move byte from address (R|E)SI to (R|E)DI.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVS</mnem>
+			<args>m16,m16</args>
+			<opc openc="NP">A5</opc>
+			<dscrp>For legacy mode, move word from address DS:(E)SI to ES:(E)DI. For 64-bit mode move word at address (R|E)SI to (R|E)DI.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVS</mnem>
+			<args>m32,m32</args>
+			<opc openc="NP">A5</opc>
+			<dscrp>For legacy mode, move dword from address DS:(E)SI to ES:(E)DI. For 64-bit mode move dword from address (R|E)SI to (R|E)DI.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOVS</mnem>
+			<args>m64,m64</args>
+			<opc openc="NP">REX.W + A5</opc>
+			<dscrp>Move qword from address (R|E)SI to (R|E)DI.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSB</mnem>
+			<args>void</args>
+			<opc openc="NP">A4</opc>
+			<dscrp>For legacy mode, Move byte from address DS:(E)SI to ES:(E)DI. For 64-bit mode move byte from address (R|E)SI to (R|E)DI.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSW</mnem>
+			<args>void</args>
+			<opc openc="NP">A5</opc>
+			<dscrp>For legacy mode, move word from address DS:(E)SI to ES:(E)DI. For 64-bit mode move word at address (R|E)SI to (R|E)DI.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSD</mnem>
+			<args>void</args>
+			<opc openc="NP">A5</opc>
+			<dscrp>For legacy mode, move dword from address DS:(E)SI to ES:(E)DI. For 64-bit mode move dword from address (R|E)SI to (R|E)DI.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOVSQ</mnem>
+			<args>void</args>
+			<opc openc="NP">REX.W + A5</opc>
+			<dscrp>Move qword from address (R|E)SI to (R|E)DI.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVSD--Move Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 10 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move scalar double-precision floating-point value from xmm2/m64 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="RVM">VEX.NDS.LIG.F2.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge scalar double-precision floating-point value from xmm2 and xmm3 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>xmm1,m64</args>
+			<opc openc="XM">VEX.LIG.F2.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Load scalar double-precision floating-point value from m64 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSD</mnem>
+			<args>xmm2/m64,xmm1</args>
+			<opc openc="MR">F2 0F 11 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move scalar double-precision floating-point value from xmm1 register to xmm2/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="MVR">VEX.NDS.LIG.F2.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge scalar double-precision floating-point value from xmm2 and xmm3 registers to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSD</mnem>
+			<args>m64,xmm1</args>
+			<opc openc="MR">VEX.LIG.F2.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move scalar double-precision floating-point value from xmm1 register to m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="XM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MVR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:reg(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVSHDUP--Move Packed Single-FP High and Duplicate.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSHDUP</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">F3 0F 16 /r</opc>
+			<cpuid>
+				<flag>SSE3</flag>
+			</cpuid>
+			<dscrp>Move two single-precision floating-point values from the higher 32-bit operand of each qword in xmm2/m128 to xmm1 and duplicate each 32-bit operand to the lower 32-bits of each qword.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSHDUP</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG 16 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move odd index single-precision floating-point values from xmm2/mem and duplicate each element into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSHDUP</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.F3.0F.WIG 16 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move odd index single-precision floating-point values from ymm2/mem and duplicate each element into ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVSLDUP--Move Packed Single-FP Low and Duplicate.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSLDUP</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">F3 0F 12 /r</opc>
+			<cpuid>
+				<flag>SSE3</flag>
+			</cpuid>
+			<dscrp>Move two single-precision floating-point values from the lower 32-bit operand of each qword in xmm2/m128 to xmm1 and duplicate each 32-bit operand to the higher 32-bits of each qword.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSLDUP</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.F3.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move even index single-precision floatingpoint values from xmm2/mem and duplicate each element into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSLDUP</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.F3.0F.WIG 12 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move even index single-precision floatingpoint values from ymm2/mem and duplicate each element into ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVSS--Move Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 10 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move scalar single-precision floating-point value from xmm2/m32 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="RVM">VEX.NDS.LIG.F3.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge scalar single-precision floating-point value from xmm2 and xmm3 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>xmm1,m32</args>
+			<opc openc="XM">VEX.LIG.F3.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Load scalar single-precision floating-point value from m32 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSS</mnem>
+			<args>xmm2/m32,xmm</args>
+			<opc openc="MR">F3 0F 11 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move scalar single-precision floating-point value from xmm1 register to xmm2/m32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>xmm1,xmm2,xmm3</args>
+			<opc openc="MVR">VEX.NDS.LIG.F3.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move scalar single-precision floating-point value from xmm2 and xmm3 to xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVSS</mnem>
+			<args>m32,xmm1</args>
+			<opc openc="MR">VEX.LIG.F3.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move scalar single-precision floating-point value from xmm1 register to m32.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="XM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MVR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:reg(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVSX/MOVSXD--Move with Sign-Extension.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSX</mnem>
+			<args>r16,r/m8</args>
+			<opc openc="RM">0F BE /r</opc>
+			<dscrp>Move byte to word with sign-extension.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSX</mnem>
+			<args>r32,r/m8</args>
+			<opc openc="RM">0F BE /r</opc>
+			<dscrp>Move byte to doubleword with signextension.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOVSX</mnem>
+			<args>r64,r/m8*</args>
+			<opc openc="RM">REX + 0F BE /r</opc>
+			<dscrp>Move byte to quadword with sign-extension.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVSX</mnem>
+			<args>r32,r/m16</args>
+			<opc openc="RM">0F BF /r</opc>
+			<dscrp>Move word to doubleword, with signextension.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOVSX</mnem>
+			<args>r64,r/m16</args>
+			<opc openc="RM">REX.W + 0F BF /r</opc>
+			<dscrp>Move word to quadword with sign-extension.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOVSXD</mnem>
+			<args>r64,r/m32</args>
+			<opc openc="RM">REX.W** + 63 /r</opc>
+			<dscrp>Move doubleword to quadword with signextension.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVUPD--Move Unaligned Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVUPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 10 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision floating-point values from xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floating-point from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floating-point from ymm2/mem to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVUPD</mnem>
+			<args>xmm2/m128,xmm</args>
+			<opc openc="MR">66 0F 11 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move packed double-precision floating-point values from xmm1 to xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">VEX.128.66.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floating-point from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPD</mnem>
+			<args>ymm2/m256,ymm1</args>
+			<opc openc="MR">VEX.256.66.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed double-precision floating-point from ymm1 to ymm2/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVUPS--Move Unaligned Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVUPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 10 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision floating-point values from xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point from xmm2/mem to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.0F.WIG 10 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point from ymm2/mem to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVUPS</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">0F 11 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move packed single-precision floating-point values from xmm1 to xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>xmm2/m128,xmm1</args>
+			<opc openc="MR">VEX.128.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point from xmm1 to xmm2/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMOVUPS</mnem>
+			<args>ymm2/m256,ymm1</args>
+			<opc openc="MR">VEX.256.0F.WIG 11 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move unaligned packed single-precision floating-point from ymm1 to ymm2/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MOVZX--Move with Zero-Extend.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVZX</mnem>
+			<args>r16,r/m8</args>
+			<opc openc="RM">0F B6 /r</opc>
+			<dscrp>Move byte to word with zero-extension.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVZX</mnem>
+			<args>r32,r/m8</args>
+			<opc openc="RM">0F B6 /r</opc>
+			<dscrp>Move byte to doubleword, zero-extension.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOVZX</mnem>
+			<args>r64,r/m8*</args>
+			<opc openc="RM">REX.W + 0F B6 /r</opc>
+			<dscrp>Move byte to quadword, zero-extension.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MOVZX</mnem>
+			<args>r32,r/m16</args>
+			<opc openc="RM">0F B7 /r</opc>
+			<dscrp>Move word to doubleword, zero-extension.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MOVZX</mnem>
+			<args>r64,r/m16</args>
+			<opc openc="RM">REX.W + 0F B7 /r</opc>
+			<dscrp>Move word to quadword, zero-extension.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MPSADBW--Compute Multiple Packed Sums of Absolute Difference.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MPSADBW</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F 3A 42 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sums absolute 8-bit integer difference of adjacent groups of 4 byte integers in xmm1 and xmm2/m128 and writes the results in xmm1. Starting offsets within xmm1 and xmm2/m128 are determined by imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMPSADBW</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.WIG 42 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sums absolute 8-bit integer difference of adjacent groups of 4 byte integers in xmm2 and xmm3/m128 and writes the results in xmm1. Starting offsets within xmm2 and xmm3/m128 are determined by imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMPSADBW</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F3A.WIG 42 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sums absolute 8-bit integer difference of adjacent groups of 4 byte integers in xmm2 and ymm3/m128 and writes the results in ymm1. Starting offsets within ymm2 and xmm3/m128 are determined by imm8.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MUL--Unsigned Multiply.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MUL</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">F6 /4</opc>
+			<dscrp>Unsigned multiply (AX &lt;-- AL * r/m8).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MUL</mnem>
+			<args>r/m8</args>
+			<opc openc="M">REX + F6 /4</opc>
+			<dscrp>Unsigned multiply (AX &lt;-- AL * r/m8).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MUL</mnem>
+			<args>r/m16</args>
+			<opc openc="M">F7 /4</opc>
+			<dscrp>Unsigned multiply (DX:AX &lt;-- AX * r/m16).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>MUL</mnem>
+			<args>r/m32</args>
+			<opc openc="M">F7 /4</opc>
+			<dscrp>Unsigned multiply (EDX:EAX &lt;-- EAX * r/m32).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MUL</mnem>
+			<args>r/m64</args>
+			<opc openc="M">REX.W + F7 /4</opc>
+			<dscrp>Unsigned multiply (RDX:RAX &lt;-- RAX * r/m64).</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MULPD--Multiply Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MULPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 59 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values in xmm2/m128 by xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 59 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm3/mem to xmm2 and stores result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 59 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm3/mem to ymm2 and stores result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MULPS--Multiply Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MULPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 59 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values in xmm2/mem by xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 59 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm3/mem to xmm2 and stores result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 59 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm3/mem to ymm2 and stores result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MULSD--Multiply Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MULSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 59 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply the low double-precision floatingpoint value in xmm2/mem64 by low doubleprecision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.LIG.F2.0F.WIG 59/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the low double-precision floatingpoint value in xmm3/mem64 by low double precision floating-point value in xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MULSS--Multiply Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MULSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 59 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Multiply the low single-precision floating-point value in xmm2/mem by the low singleprecision floating-point value in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMULSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.LIG.F3.0F.WIG 59 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the low single-precision floating-point value in xmm3/mem by the low singleprecision floating-point value in xmm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MULX--Unsigned Multiply Without Affecting Flags.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MULX</mnem>
+			<args>r32a,r32b,r/m32</args>
+			<opc openc="RVM">VEX.NDD.LZ.F2.0F38.W0 F6 /r</opc>
+			<cpuid>
+				<flag>BMI2</flag>
+			</cpuid>
+			<dscrp>Unsigned multiply of r/m32 with EDX without affecting arithmetic flags.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>MULX</mnem>
+			<args>r64a,r64b,r/m64</args>
+			<opc openc="RVM">VEX.NDD.LZ.F2.0F38.W1 F6 /r</opc>
+			<cpuid>
+				<flag>BMI2</flag>
+			</cpuid>
+			<dscrp>Unsigned multiply of r/m64 with RDX without affecting arithmetic flags.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(w)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>RDX/EDX is implied 64/32 bits source</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>MWAIT--Monitor Wait.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>MWAIT</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 01 C9</opc>
+			<dscrp>A hint that allow the processor to stop instruction execution and enter an implementation-dependent optimized state until occurrence of a class of events.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>NEG--Two's Complement Negation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>NEG</mnem>
+			<args>r/m8</args>
+			<opc openc="M">F6 /3</opc>
+			<dscrp>Two's complement negate r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>NEG</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + F6 /3</opc>
+			<dscrp>Two's complement negate r/m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>NEG</mnem>
+			<args>r/m16</args>
+			<opc openc="M">F7 /3</opc>
+			<dscrp>Two's complement negate r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>NEG</mnem>
+			<args>r/m32</args>
+			<opc openc="M">F7 /3</opc>
+			<dscrp>Two's complement negate r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>NEG</mnem>
+			<args>r/m64</args>
+			<opc openc="M">REX.W + F7 /3</opc>
+			<dscrp>Two's complement negate r/m64.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>NOP--No Operation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>NOP</mnem>
+			<args>void</args>
+			<opc openc="NP">90</opc>
+			<dscrp>One byte no-operation instruction.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>NOP</mnem>
+			<args>r/m16</args>
+			<opc openc="M">0F 1F /0</opc>
+			<dscrp>Multi-byte no-operation instruction.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>NOP</mnem>
+			<args>r/m32</args>
+			<opc openc="M">0F 1F /0</opc>
+			<dscrp>Multi-byte no-operation instruction.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>NOT--One's Complement Negation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>NOT</mnem>
+			<args>r/m8</args>
+			<opc openc="M">F6 /2</opc>
+			<dscrp>Reverse each bit of r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>NOT</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + F6 /2</opc>
+			<dscrp>Reverse each bit of r/m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>NOT</mnem>
+			<args>r/m16</args>
+			<opc openc="M">F7 /2</opc>
+			<dscrp>Reverse each bit of r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>NOT</mnem>
+			<args>r/m32</args>
+			<opc openc="M">F7 /2</opc>
+			<dscrp>Reverse each bit of r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>NOT</mnem>
+			<args>r/m64</args>
+			<opc openc="M">REX.W + F7 /2</opc>
+			<dscrp>Reverse each bit of r/m64.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>OR--Logical Inclusive OR.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>OR</mnem>
+			<args>AL,imm8</args>
+			<opc openc="I">0C ib</opc>
+			<dscrp>AL OR imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OR</mnem>
+			<args>AX,imm16</args>
+			<opc openc="I">0D iw</opc>
+			<dscrp>AX OR imm16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OR</mnem>
+			<args>EAX,imm32</args>
+			<opc openc="I">0D id</opc>
+			<dscrp>EAX OR imm32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>OR</mnem>
+			<args>RAX,imm32</args>
+			<opc openc="I">REX.W + 0D id</opc>
+			<dscrp>RAX OR imm32 (sign-extended).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OR</mnem>
+			<args>r/m8,imm8</args>
+			<opc openc="MI">80 /1 ib</opc>
+			<dscrp>r/m8 OR imm8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>OR</mnem>
+			<args>r/m8*,imm8</args>
+			<opc openc="MI">REX + 80 /1 ib</opc>
+			<dscrp>r/m8 OR imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OR</mnem>
+			<args>r/m16,imm16</args>
+			<opc openc="MI">81 /1 iw</opc>
+			<dscrp>r/m16 OR imm16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OR</mnem>
+			<args>r/m32,imm32</args>
+			<opc openc="MI">81 /1 id</opc>
+			<dscrp>r/m32 OR imm32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>OR</mnem>
+			<args>r/m64,imm32</args>
+			<opc openc="MI">REX.W + 81 /1 id</opc>
+			<dscrp>r/m64 OR imm32 (sign-extended).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OR</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">83 /1 ib</opc>
+			<dscrp>r/m16 OR imm8 (sign-extended).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OR</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">83 /1 ib</opc>
+			<dscrp>r/m32 OR imm8 (sign-extended).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>OR</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + 83 /1 ib</opc>
+			<dscrp>r/m64 OR imm8 (sign-extended).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OR</mnem>
+			<args>r/m8,r8</args>
+			<opc openc="MR">08 /r</opc>
+			<dscrp>r/m8 OR r8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>OR</mnem>
+			<args>r/m8*,r8*</args>
+			<opc openc="MR">REX + 08 /r</opc>
+			<dscrp>r/m8 OR r8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OR</mnem>
+			<args>r/m16,r16</args>
+			<opc openc="MR">09 /r</opc>
+			<dscrp>r/m16 OR r16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OR</mnem>
+			<args>r/m32,r32</args>
+			<opc openc="MR">09 /r</opc>
+			<dscrp>r/m32 OR r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>OR</mnem>
+			<args>r/m64,r64</args>
+			<opc openc="MR">REX.W + 09 /r</opc>
+			<dscrp>r/m64 OR r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OR</mnem>
+			<args>r8,r/m8</args>
+			<opc openc="RM">0A /r</opc>
+			<dscrp>r8 OR r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>OR</mnem>
+			<args>r8*,r/m8*</args>
+			<opc openc="RM">REX + 0A /r</opc>
+			<dscrp>r8 OR r/m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OR</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">0B /r</opc>
+			<dscrp>r16 OR r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OR</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">0B /r</opc>
+			<dscrp>r32 OR r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>OR</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 0B /r</opc>
+			<dscrp>r64 OR r/m64.</dscrp>
+		</ins>
+		<oprndenc openc="I">
+			<oprnd1>AL/AX/EAX/RAX</oprnd1>
+			<oprnd2>imm8(r)/16/32</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>imm8(r)/16/32</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ORPD--Bitwise Logical OR of Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ORPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 56 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 56 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 56 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ORPS--Bitwise Logical OR of Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ORPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 56 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of xmm1 and xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 56 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed singleprecision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VORPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 56 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical OR of packed singleprecision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>OUT--Output to Port.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>OUT</mnem>
+			<args>imm8,AL</args>
+			<opc openc="I">E6 ib</opc>
+			<dscrp>Output byte in AL to I/O port address imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OUT</mnem>
+			<args>imm8,AX</args>
+			<opc openc="I">E7 ib</opc>
+			<dscrp>Output word in AX to I/O port address imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OUT</mnem>
+			<args>imm8,EAX</args>
+			<opc openc="I">E7 ib</opc>
+			<dscrp>Output doubleword in EAX to I/O port address imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OUT</mnem>
+			<args>DX,AL</args>
+			<opc openc="NP">EE</opc>
+			<dscrp>Output byte in AL to I/O port address in DX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OUT</mnem>
+			<args>DX,AX</args>
+			<opc openc="NP">EF</opc>
+			<dscrp>Output word in AX to I/O port address in DX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OUT</mnem>
+			<args>DX,EAX</args>
+			<opc openc="NP">EF</opc>
+			<dscrp>Output doubleword in EAX to I/O port address in DX.</dscrp>
+		</ins>
+		<oprndenc openc="I">
+			<oprnd1>imm8(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>OUTS/OUTSB/OUTSW/OUTSD--Output String to Port.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>OUTS</mnem>
+			<args>DX,m8</args>
+			<opc openc="NP">6E</opc>
+			<dscrp>Output byte from memory location specified in DS:(E)SI or RSI to I/O port specified in DX**.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OUTS</mnem>
+			<args>DX,m16</args>
+			<opc openc="NP">6F</opc>
+			<dscrp>Output word from memory location specified in DS:(E)SI or RSI to I/O port specified in DX**.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OUTS</mnem>
+			<args>DX,m32</args>
+			<opc openc="NP">6F</opc>
+			<dscrp>Output doubleword from memory location specified in DS:(E)SI or RSI to I/O port specified in DX**.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OUTSB</mnem>
+			<args>void</args>
+			<opc openc="NP">6E</opc>
+			<dscrp>Output byte from memory location specified in DS:(E)SI or RSI to I/O port specified in DX**.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OUTSW</mnem>
+			<args>void</args>
+			<opc openc="NP">6F</opc>
+			<dscrp>Output word from memory location specified in DS:(E)SI or RSI to I/O port specified in DX**.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>OUTSD</mnem>
+			<args>void</args>
+			<opc openc="NP">6F</opc>
+			<dscrp>Output doubleword from memory location specified in DS:(E)SI or RSI to I/O port specified in DX**.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PABSB/PABSW/PABSD--Packed Absolute Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PABSB</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F 38 1C /r1</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of bytes in mm2/m64 and store UNSIGNED result in mm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PABSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 1C /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of bytes in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PABSW</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F 38 1D /r1</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 16-bit integers in mm2/m64 and store UNSIGNED result in mm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PABSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 1D /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 16-bit integers in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PABSD</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F 38 1E /r1</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 32-bit integers in mm2/m64 and store UNSIGNED result in mm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PABSD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 1E /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 32-bit integers in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 1C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of bytes in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 1D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 16bit integers in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 1E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 32bit integers in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSB</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 1C /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of bytes in ymm2/m256 and store UNSIGNED result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSW</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 1D /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 16-bit integers in ymm2/m256 and store UNSIGNED result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPABSD</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 1E /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compute the absolute value of 32-bit integers in ymm2/m256 and store UNSIGNED result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PACKSSWB/PACKSSDW--Pack with Signed Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PACKSSWB</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F 63 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed signed word integers from mm1 and from mm2/m64 into 8 packed signed byte integers in mm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PACKSSWB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 63 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed word integers from xmm1 and from xxm2/m128 into 16 packed signed byte integers in xxm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PACKSSDW</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F 6B /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Converts 2 packed signed doubleword integers from mm1 and from mm2/m64 into 4 packed signed word integers in mm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PACKSSDW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 6B /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed signed doubleword integers from xmm1 and from xxm2/m128 into 8 packed signed word integers in xxm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSWB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 63 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed word integers from xmm2 and from xmm3/m128 into 16 packed signed byte integers in xmm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSDW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 6B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Converts 4 packed signed doubleword integers from xmm2 and from xmm3/m128 into 8 packed signed word integers in xmm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSWB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 63 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Converts 16 packed signed word integers from ymm2 and from ymm3/m256 into 32 packed signed byte integers in ymm1 using signed saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKSSDW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 6B /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Converts 8 packed signed doubleword integers from ymm2 and from ymm3/m256 into 16 packed signed word integers in ymm1using signed saturation.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PACKUSDW--Pack with Unsigned Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PACKUSDW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 2B /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Convert 4 packed signed doubleword integers from xmm1 and 4 packed signed doubleword integers from xmm2/m128 into 8 packed unsigned word integers in xmm1 using unsigned saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSDW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 2B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Convert 4 packed signed doubleword integers from xmm2 and 4 packed signed doubleword integers from xmm3/m128 into 8 packed unsigned word integers in xmm1 using unsigned saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSDW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 2B /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Convert 8 packed signed doubleword integers from ymm2 and 8 packed signed doubleword integers from ymm3/m128 into 16 packed unsigned word integers in ymm1 using unsigned saturation.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PACKUSWB--Pack with Unsigned Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PACKUSWB</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F 67 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Converts 4 signed word integers from mm and 4 signed word integers from mm/m64 into 8 unsigned byte integers in mm using unsigned saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PACKUSWB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 67 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Converts 8 signed word integers from xmm1 and 8 signed word integers from xmm2/m128 into 16 unsigned byte integers in xmm1 using unsigned saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSWB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 67 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Converts 8 signed word integers from xmm2 and 8 signed word integers from xmm3/m128 into 16 unsigned byte integers in xmm1 using unsigned saturation.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPACKUSWB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 67 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Converts 16 signed word integers from ymm2 and 16signed word integers from ymm3/m256 into 32 unsigned byte integers in ymm1 using unsigned saturation.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PADDB/PADDW/PADDD--Add Packed Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDB</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F FC /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Add packed byte integers from mm/m64 and mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F FC /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed byte integers from xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDW</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F FD /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Add packed word integers from mm/m64 and mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F FD /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed word integers from xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDD</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F FE /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Add packed doubleword integers from mm/m64 and mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F FE /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed doubleword integers from xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FC /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed byte integers from xmm3/m128 and xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FD /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed word integers from xmm3/m128 and xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FE /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed doubleword integers from xmm3/m128 and xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FC /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed byte integers from ymm2, and ymm3/m256 and store in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FD /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed word integers from ymm2, ymm3/m256 and store in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FE /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed doubleword integers from ymm2, ymm3/m256 and store in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PADDQ--Add Packed Quadword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDQ</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F D4 /r1</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add quadword integer mm2/m64 to mm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D4 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed quadword integers xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D4 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed quadword integers xmm3/m128 and xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D4 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed quadword integers from ymm2, ymm3/m256 and store in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PADDSB/PADDSW--Add Packed Signed Integers with Signed Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDSB</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F EC /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Add packed signed byte integers from mm/m64 and mm and saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F EC /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed signed byte integers from xmm2/m128 and xmm1 saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDSW</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F ED /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Add packed signed word integers from mm/m64 and mm and saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F ED /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed signed word integers from xmm2/m128 and xmm1 and saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG EC /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed signed byte integers from xmm3/m128 and xmm2 saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG ED /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed signed word integers from xmm3/m128 and xmm2 and saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG EC /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed signed byte integers from ymm2, and ymm3/m256 and store the saturated results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG ED /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed signed word integers from ymm2, and ymm3/m256 and store the saturated results in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PADDUSB/PADDUSW--Add Packed Unsigned Integers with Unsigned Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDUSB</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F DC /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned byte integers from mm/m64 and mm and saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDUSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F DC /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned byte integers from xmm2/m128 and xmm1 saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDUSW</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F DD /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned word integers from mm/m64 and mm and saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PADDUSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F DD /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned word integers from xmm2/m128 to xmm1 and saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.660F.WIG DC /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned byte integers from xmm3/m128 to xmm2 and saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG DD /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned word integers from xmm3/m128 to xmm2 and saturate the results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG DC /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned byte integers from ymm2, and ymm3/m256 and store the saturated results in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPADDUSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG DD /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add packed unsigned word integers from ymm2, and ymm3/m256 and store the saturated results in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PALIGNR--Packed Align Right.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PALIGNR</mnem>
+			<args>mm1,mm2/m64,imm8</args>
+			<opc openc="RMI">0F 3A 0F /r ib1</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Concatenate destination and source operands, extract byte-aligned result shifted to the right by constant value in imm8 into mm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PALIGNR</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F 3A 0F /r ib</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Concatenate destination and source operands, extract byte-aligned result shifted to the right by constant value in imm8 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPALIGNR</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.WIG 0F /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Concatenate xmm2 and xmm3/m128, extract byte aligned result shifted to the right by constant value in imm8 and result is stored in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPALIGNR</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F3A.WIG 0F /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Concatenate pairs of 16 bytes in ymm2 and ymm3/m256 into 32-byte intermediate result, extract byte-aligned, 16-byte result shifted to the right by constant values in imm8 from each intermediate result, and two 16-byte results are stored in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PAND--Logical AND.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PAND</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F DB /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Bitwise AND mm/m64 and mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PAND</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F DB /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAND</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG DB /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of xmm3/m128 and xmm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAND</mnem>
+			<args>ymm1,ymm2,ymm3/.m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG DB /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Bitwise AND of ymm2, and ymm3/m256 and store result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PANDN--Logical AND NOT.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PANDN</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F DF /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of mm/m64 and mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PANDN</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F DF /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDN</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG DF /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of xmm3/m128 and xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPANDN</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG DF /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Bitwise AND NOT of ymm2, and ymm3/m256 and store result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PAUSE--Spin Loop Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PAUSE</mnem>
+			<args>void</args>
+			<opc openc="NP">F3 90</opc>
+			<dscrp>Gives hint to processor that improves performance of spin-wait loops.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PAVGB/PAVGW--Average Packed Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PAVGB</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F E0 /r1</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned byte integers from mm2/m64 and mm1 with rounding.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PAVGB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E0,/r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned byte integers from xmm2/m128 and xmm1 with rounding.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PAVGW</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F E3 /r1</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned word integers from mm2/m64 and mm1 with rounding.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PAVGW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E3 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG E0 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned byte integers from xmm3/m128 and xmm2 with rounding.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG E3 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned word integers from xmm3/m128 and xmm2 with rounding.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG E0 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned byte integers from ymm2, and ymm3/m256 with rounding and store to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPAVGW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG E3 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Average packed unsigned word integers from ymm2, ymm3/m256 with rounding to ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PBLENDVB--Variable Blend Packed Bytes.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PBLENDVB</mnem>
+			<args>xmm1,xmm2/m128,&lt;XMM0&gt;</args>
+			<opc openc="RM">66 0F 38 10 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Select byte values from xmm1 and xmm2/m128 from mask specified in the high values into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDVB</mnem>
+			<args>xmm1,xmm2,xmm3/m128,xmm4</args>
+			<opc openc="RVMR">VEX.NDS.128.66.0F3A.W0 4C /r /is4</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Select byte values from xmm2 and xmm3/m128 using mask bits in the specified mask register, xmm4, and store the values into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDVB</mnem>
+			<args>ymm1,ymm2,ymm3/m256,ymm4</args>
+			<opc openc="RVMR">VEX.NDS.256.66.0F3A.W0 4C /r /is4</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Select byte values from ymm2 and ymm3/m256 from mask specified in the high values into ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>implicit XMM0</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMR">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)[7:4]</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PBLENDW--Blend Packed Words.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PBLENDW</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F 3A 0E /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Select words from xmm1 and xmm2/m128 from mask specified in imm8 and store the values into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDW</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.WIG 0E /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Select words from xmm2 and xmm3/m128 from mask specified in imm8 and store the values into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDW</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F3A.WIG 0E /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Select words from ymm2 and ymm3/m256 from mask specified in imm8 and store the values into ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PCLMULQDQ--Carry-Less Multiplication Quadword.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PCLMULQDQ</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F 3A 44 /r ib</opc>
+			<cpuid>
+				<flag>PCLMULQDQ</flag>
+			</cpuid>
+			<dscrp>Carry-less multiplication of one quadword of xmm1 by one quadword of xmm2/m128, stores the 128-bit result in xmm1. The immediate is used to determine which quadwords of xmm1 and xmm2/m128 should be used.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCLMULQDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.WIG 44 /r ib</opc>
+			<cpuid>
+				<flag>PCLMULQDQ</flag>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Carry-less multiplication of one quadword of xmm2 by one quadword of xmm3/m128, stores the 128-bit result in xmm1. The immediate is used to determine which quadwords of xmm2 and xmm3/m128 should be used.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PCMPEQB/PCMPEQW/PCMPEQD--Compare Packed Data for Equal.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPEQB</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F 74 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Compare packed bytes in mm/m64 and mm for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPEQB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 74 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed bytes in xmm2/m128 and xmm1 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPEQW</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F 75 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Compare packed words in mm/m64 and mm for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPEQW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 75 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed words in xmm2/m128 and xmm1 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPEQD</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F 76 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Compare packed doublewords in mm/m64 and mm for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPEQD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 76 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed doublewords in xmm2/m128 and xmm1 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 74 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed bytes in xmm3/m128 and xmm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 75 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed words in xmm3/m128 and xmm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 76 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed doublewords in xmm3/m128 and xmm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQB</mnem>
+			<args>ymm1,ymm2,ymm3 /m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 74 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed bytes in ymm3/m256 and ymm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQW</mnem>
+			<args>ymm1,ymm2,ymm3 /m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 75 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed words in ymm3/m256 and ymm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQD</mnem>
+			<args>ymm1,ymm2,ymm3 /m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 76 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed doublewords in ymm3/m256 and ymm2 for equality.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PCMPEQQ--Compare Packed Qword Data for Equal.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPEQQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 29 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed qwords in xmm2/m128 and xmm1 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 29 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed quadwords in xmm3/m128 and xmm2 for equality.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPEQQ</mnem>
+			<args>ymm1,ymm2,ymm3 /m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 29 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed quadwords in ymm3/m256 and ymm2 for equality.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PCMPESTRI--Packed Compare Explicit Length Strings, Return Index.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPESTRI</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F 3A 61 /r imm8</opc>
+			<cpuid>
+				<flag>SSE4_2</flag>
+			</cpuid>
+			<dscrp>Perform a packed comparison of string data with explicit lengths, generating an index, and storing the result in ECX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPESTRI</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">VEX.128.66.0F3A.WIG 61 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Perform a packed comparison of string data with explicit lengths, generating an index, and storing the result in ECX.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PCMPESTRM--Packed Compare Explicit Length Strings, Return Mask.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPESTRM</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F 3A 60 /r imm8</opc>
+			<cpuid>
+				<flag>SSE4_2</flag>
+			</cpuid>
+			<dscrp>Perform a packed comparison of string data with explicit lengths, generating a mask, and storing the result in XMM0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPESTRM</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">VEX.128.66.0F3A.WIG 60 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Perform a packed comparison of string data with explicit lengths, generating a mask, and storing the result in XMM0.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PCMPGTB/PCMPGTW/PCMPGTD--Compare Packed Signed Integers for Greater Than.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPGTB</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F 64 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in mm and mm/m64 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPGTB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 64 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm1 and xmm2/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPGTW</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F 65 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in mm and mm/m64 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPGTW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 65 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm1 and xmm2/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPGTD</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F 66 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed doubleword integers in mm and mm/m64 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPGTD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 66 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed doubleword integers in xmm1 and xmm2/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 64 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 65 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm2 and xmm3/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 66 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed doubleword integers in xmm2 and xmm3/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 64 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 65 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in ymm2 and ymm3/m256 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 66 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed doubleword integers in ymm2 and ymm3/m256 for greater than.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PCMPGTQ--Compare Packed Data for Greater Than.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPGTQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 37 /r</opc>
+			<cpuid>
+				<flag>SSE4_2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qwords in xmm2/m128 and xmm1 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 37 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qwords in xmm2 and xmm3/m128 for greater than.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPGTQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 37 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed qwords in ymm2 and ymm3/m256 for greater than.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PCMPISTRI--Packed Compare Implicit Length Strings, Return Index.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPISTRI</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RM">66 0F 3A 63 /r imm8</opc>
+			<cpuid>
+				<flag>SSE4_2</flag>
+			</cpuid>
+			<dscrp>Perform a packed comparison of string data with implicit lengths, generating an index, and storing the result in ECX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPISTRI</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RM">VEX.128.66.0F3A.WIG 63 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Perform a packed comparison of string data with implicit lengths, generating an index, and storing the result in ECX.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PCMPISTRM--Packed Compare Implicit Length Strings, Return Mask.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PCMPISTRM</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RM">66 0F 3A 62 /r imm8</opc>
+			<cpuid>
+				<flag>SSE4_2</flag>
+			</cpuid>
+			<dscrp>Perform a packed comparison of string data with implicit lengths, generating a mask, and storing the result in XMM0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPCMPISTRM</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RM">VEX.128.66.0F3A.WIG 62 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Perform a packed comparison of string data with implicit lengths, generating a Mask, and storing the result in XMM0.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PDEP--Parallel Bits Deposit.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PDEP</mnem>
+			<args>r32a,r32b,r/m32</args>
+			<opc openc="RVM">VEX.NDS.LZ.F2.0F38.W0 F5 /r</opc>
+			<cpuid>
+				<flag>BMI2</flag>
+			</cpuid>
+			<dscrp>Parallel deposit of bits from r32b using mask in r/m32, result is written to r32a.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>PDEP</mnem>
+			<args>r64a,r64b,r/m64</args>
+			<opc openc="RVM">VEX.NDS.LZ.F2.0F38.W1 F5 /r</opc>
+			<cpuid>
+				<flag>BMI2</flag>
+			</cpuid>
+			<dscrp>Parallel deposit of bits from r64b using mask in r/m64, result is written to r64a.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PEXT--Parallel Bits Extract.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PEXT</mnem>
+			<args>r32a,r32b,r/m32</args>
+			<opc openc="RVM">VEX.NDS.LZ.F3.0F38.W0 F5 /r</opc>
+			<cpuid>
+				<flag>BMI2</flag>
+			</cpuid>
+			<dscrp>Parallel extract of bits from r32b using mask in r/m32, result is written to r32a.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>PEXT</mnem>
+			<args>r64a,r64b,r/m64</args>
+			<opc openc="RVM">VEX.NDS.LZ.F3.0F38.W1 F5 /r</opc>
+			<cpuid>
+				<flag>BMI2</flag>
+			</cpuid>
+			<dscrp>Parallel extract of bits from r64b using mask in r/m64, result is written to r64a.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PEXTRB/PEXTRD/PEXTRQ--Extract Byte/Dword/Qword.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PEXTRB</mnem>
+			<args>reg/m8,xmm2,imm8</args>
+			<opc openc="MRI">66 0F 3A 14 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Extract a byte integer value from xmm2 at the source byte offset specified by imm8 into reg or m8. The upper bits of r32 or r64 are zeroed.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PEXTRD</mnem>
+			<args>r/m32,xmm2,imm8</args>
+			<opc openc="MRI">66 0F 3A 16 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Extract a dword integer value from xmm2 at the source dword offset specified by imm8 into r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>PEXTRQ</mnem>
+			<args>r/m64,xmm2,imm8</args>
+			<opc openc="MRI">66 REX.W 0F 3A 16 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Extract a qword integer value from xmm2 at the source qword offset specified by imm8 into r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V1">
+			<mnem>VPEXTRB</mnem>
+			<args>reg/m8,xmm2,imm8</args>
+			<opc openc="MRI">VEX.128.66.0F3A.W0 14 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract a byte integer value from xmm2 at the source byte offset specified by imm8 into reg or m8. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXTRD</mnem>
+			<args>r32/m32,xmm2,imm8</args>
+			<opc openc="MRI">VEX.128.66.0F3A.W0 16 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract a dword integer value from xmm2 at the source dword offset specified by imm8 into r32/m32.</dscrp>
+		</ins>
+		<ins x32m="i" x64m="V">
+			<mnem>VPEXTRQ</mnem>
+			<args>r64/m64,xmm2,imm8</args>
+			<opc openc="MRI">VEX.128.66.0F3A.W1 16 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract a qword integer value from xmm2 at the source dword offset specified by imm8 into r64/m64.</dscrp>
+		</ins>
+		<oprndenc openc="MRI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PEXTRW--Extract Word.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PEXTRW</mnem>
+			<args>reg,mm,imm8</args>
+			<opc openc="RMI">0F C5 /r ib 1</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Extract the word specified by imm8 from mm and move it to reg, bits 15-0. The upper bits of r32 or r64 is zeroed.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PEXTRW</mnem>
+			<args>reg,xmm,imm8</args>
+			<opc openc="RMI">66 0F C5 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Extract the word specified by imm8 from xmm and move it to reg, bits 15-0. The upper bits of r32 or r64 is zeroed.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PEXTRW</mnem>
+			<args>reg/m16,xmm,imm8</args>
+			<opc openc="MRI">66 0F 3A 15 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Extract the word specified by imm8 from xmm and copy it to lowest 16 bits of reg or m16. Zero-extend the result in the destination, r32 or r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V2">
+			<mnem>VPEXTRW</mnem>
+			<args>reg,xmm1,imm8</args>
+			<opc openc="RMI">VEX.128.66.0F.W0 C5 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract the word specified by imm8 from xmm1 and move it to reg, bits 15:0. Zeroextend the result. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPEXTRW</mnem>
+			<args>reg/m16,xmm2,imm8</args>
+			<opc openc="MRI">VEX.128.66.0F3A.W0 15 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract a word integer value from xmm2 at the source word offset specified by imm8 into reg or m16. The upper bits of r64/r32 is filled with zeros.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MRI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PHADDW/PHADDD--Packed Horizontal Add.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PHADDW</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F 38 01 /r1</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Add 16-bit integers horizontally, pack to mm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PHADDW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 01 /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Add 16-bit integers horizontally, pack to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PHADDD</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F 38 02 /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Add 32-bit integers horizontally, pack to mm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PHADDD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 02 /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Add 32-bit integers horizontally, pack to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPHADDW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 01 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add 16-bit integers horizontally, pack to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPHADDD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 02 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add 32-bit integers horizontally, pack to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPHADDW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 01 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add 16-bit signed integers horizontally, pack to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPHADDD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 02 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add 32-bit signed integers horizontally, pack to ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PHADDSW--Packed Horizontal Add and Saturate.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PHADDSW</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F 38 03 /r1</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Add 16-bit signed integers horizontally, pack saturated integers to mm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PHADDSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 03 /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Add 16-bit signed integers horizontally, pack saturated integers to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPHADDSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 03 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Add 16-bit signed integers horizontally, pack saturated integers to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPHADDSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 03 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Add 16-bit signed integers horizontally, pack saturated integers to ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PHMINPOSUW--Packed Horizontal Word Minimum.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PHMINPOSUW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 41 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Find the minimum unsigned word in xmm2/m128 and place its value in the low word of xmm1 and its index in the secondlowest word of xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPHMINPOSUW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 41 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Find the minimum unsigned word in xmm2/m128 and place its value in the low word of xmm1 and its index in the secondlowest word of xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PHSUBW/PHSUBD--Packed Horizontal Subtract.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PHSUBW</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F 38 05 /r1</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Subtract 16-bit signed integers horizontally, pack to mm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PHSUBW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 05 /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Subtract 16-bit signed integers horizontally, pack to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PHSUBD</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F 38 06 /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Subtract 32-bit signed integers horizontally, pack to mm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PHSUBD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 06 /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Subtract 32-bit signed integers horizontally, pack to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPHSUBW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 05 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract 16-bit signed integers horizontally, pack to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPHSUBD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 06 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract 32-bit signed integers horizontally, pack to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPHSUBW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 05 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract 16-bit signed integers horizontally, pack to ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPHSUBD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 06 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract 32-bit signed integers horizontally, pack to ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PHSUBSW--Packed Horizontal Subtract and Saturate.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PHSUBSW</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F 38 07 /r1</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Subtract 16-bit signed integer horizontally, pack saturated integers to mm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PHSUBSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 07 /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Subtract 16-bit signed integer horizontally, pack saturated integers to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPHSUBSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 07 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract 16-bit signed integer horizontally, pack saturated integers to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPHSUBSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 07 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract 16-bit signed integer horizontally, pack saturated integers to ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PINSRB/PINSRD/PINSRQ--Insert Byte/Dword/Qword.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PINSRB</mnem>
+			<args>xmm1,r32/m8,imm8</args>
+			<opc openc="RMI">66 0F 3A 20 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Insert a byte integer value from r32/m8 into xmm1 at the destination element in xmm1 specified by imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PINSRD</mnem>
+			<args>xmm1,r/m32,imm8</args>
+			<opc openc="RMI">66 0F 3A 22 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Insert a dword integer value from r/m32 into the xmm1 at the destination element specified by imm8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>PINSRQ</mnem>
+			<args>xmm1,r/m64,imm8</args>
+			<opc openc="RMI">66 REX.W 0F 3A 22 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Insert a qword integer value from r/m64 into the xmm1 at the destination element specified by imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V1">
+			<mnem>VPINSRB</mnem>
+			<args>xmm1,xmm2,r32/m8,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.W0 20 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Merge a byte integer value from r32/m8 and rest from xmm2 into xmm1 at the byte offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPINSRD</mnem>
+			<args>xmm1,xmm2,r/m32,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.W0 22 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Insert a dword integer value from r32/m32 and rest from xmm2 into xmm1 at the dword offset in imm8.</dscrp>
+		</ins>
+		<ins x32m="I" x64m="V">
+			<mnem>VPINSRQ</mnem>
+			<args>xmm1,xmm2,r/m64,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.W1 22 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Insert a qword integer value from r64/m64 and rest from xmm2 into xmm1 at the qword offset in imm8.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PINSRW--Insert Word.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PINSRW</mnem>
+			<args>mm,r32/m16,imm8</args>
+			<opc openc="RMI">0F C4 /r ib 1</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Insert the low word from r32 or from m16 into mm at the word position specified by imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PINSRW</mnem>
+			<args>xmm,r32/m16,imm8</args>
+			<opc openc="RMI">66 0F C4 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move the low word of r32 or from m16 into xmm at the word position specified by imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V2">
+			<mnem>VPINSRW</mnem>
+			<args>xmm1,xmm2,r32/m16,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F.W0 C4 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Insert a word integer value from r32/m16 and rest from xmm2 into xmm1 at the word offset in imm8.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMADDUBSW--Multiply and Add Packed Signed and Unsigned Bytes.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMADDUBSW</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F 38 04 /r1</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to mm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMADDUBSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 04 /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDUBSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 04 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDUBSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 04 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMADDWD--Multiply and Add Packed Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMADDWD</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F F5 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Multiply the packed words in mm by the packed words in mm/m64, add adjacent doubleword results, and store in mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMADDWD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F5 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed word integers in xmm1 by the packed word integers in xmm2/m128, add adjacent doubleword results, and store in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDWD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F5 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the packed word integers in xmm2 by the packed word integers in xmm3/m128, add adjacent doubleword results, and store in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMADDWD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F5 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed word integers in ymm2 by the packed word integers in ymm3/m256, add adjacent doubleword results, and store in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMAXSB--Maximum of Packed Signed Byte Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 3C /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 3C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 3C /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m128 and store packed maximum values in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMAXSD--Maximum of Packed Signed Dword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXSD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 3D /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in xmm1 and xmm2/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 3D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 3D /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in ymm2 and ymm3/m128 and store packed maximum values in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMAXSW--Maximum of Packed Signed Word Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXSW</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F EE /r1</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Compare signed word integers in mm2/m64 and mm1 and return maximum values.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F EE /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare signed word integers in xmm2/m128 and xmm1 and return maximum values.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG EE /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm3/m128 and xmm2 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG EE /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in ymm3/m128 and ymm2 and store packed maximum values in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMAXUB--Maximum of Packed Unsigned Byte Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXUB</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F DE /r1</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Compare unsigned byte integers in mm2/m64 and mm1 and returns maximum values.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXUB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F DE /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare unsigned byte integers in xmm2/m128 and xmm1 and returns maximum values.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG DE /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG DE /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMAXUD--Maximum of Packed Unsigned Dword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXUD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 3F /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in xmm1 and xmm2/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 3F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 3F /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMAXUW--Maximum of Packed Word Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMAXUW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 3E /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed maximum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 3E/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in xmm3/m128 and xmm2 and store maximum packed values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMAXUW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 3E /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in ymm3/m256 and ymm2 and store maximum packed values in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMINSB--Minimum of Packed Signed Byte Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 38 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 38 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 38 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMINSD--Minimum of Packed Dword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINSD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 39 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in xmm1 and xmm2/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 39 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 39 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed dword integers in ymm2 and ymm3/m128 and store packed minimum values in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMINSW--Minimum of Packed Signed Word Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINSW</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F EA /r1</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Compare signed word integers in mm2/m64 and mm1 and return minimum values.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F EA /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare signed word integers in xmm2/m128 and xmm1 and return minimum values.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG EA /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in xmm3/m128 and xmm2 and return packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG EA /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed signed word integers in ymm3/m256 and ymm2 and return packed minimum values in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMINUB--Minimum of Packed Unsigned Byte Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINUB</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F DA /r1</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Compare unsigned byte integers in mm2/m64 and mm1 and returns minimum values.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINUB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F DA /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compare unsigned byte integers in xmm2/m128 and xmm1 and returns minimum values.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG DA /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG DA /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned byte integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMINUD--Minimum of Packed Dword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINUD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 3B /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in xmm1 and xmm2/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 3B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 3B /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned dword integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMINUW--Minimum of Packed Word Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMINUW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 3A /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 3A/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in xmm3/m128 and xmm2 and return packed minimum values in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMINUW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 3A /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Compare packed unsigned word integers in ymm3/m256 and ymm2 and return packed minimum values in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMOVMSKB--Move Byte Mask.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVMSKB</mnem>
+			<args>reg,mm</args>
+			<opc openc="RM">0F D7 /r1</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Move a byte mask of mm to reg. The upper bits of r32 or r64 are zeroed.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVMSKB</mnem>
+			<args>reg,xmm</args>
+			<opc openc="RM">66 0F D7 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Move a byte mask of xmm to reg. The upper bits of r32 or r64 are zeroed.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVMSKB</mnem>
+			<args>reg,xmm1</args>
+			<opc openc="RM">VEX.128.66.0F.WIG D7 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Move a byte mask of xmm1 to reg. The upper bits of r32 or r64 are filled with zeros.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVMSKB</mnem>
+			<args>reg,ymm1</args>
+			<opc openc="RM">VEX.256.66.0F.WIG D7 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Move a 32-bit mask of ymm1 to reg. The upper bits of r64 are filled with zeros.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMOVSX--Packed Move with Sign Extend.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVSXBW</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0f 38 20 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed signed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed signed 16-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVSXBD</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">66 0f 38 21 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed signed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed signed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVSXBQ</mnem>
+			<args>xmm1,xmm2/m16</args>
+			<opc openc="RM">66 0f 38 22 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed signed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed signed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVSXWD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0f 38 23 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed signed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed signed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVSXWQ</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">66 0f 38 24 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed signed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed signed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVSXDQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0f 38 25 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed signed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed signed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBW</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 20 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBD</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 21 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBQ</mnem>
+			<args>xmm1,xmm2/m16</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 22 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 23 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWQ</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 24 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXDQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 25 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBW</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 20 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sign extend 16 packed 8-bit integers in xmm2/m128 to 16 packed 16-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBD</mnem>
+			<args>ymm1,xmm2/m64</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 21 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 32-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXBQ</mnem>
+			<args>ymm1,xmm2/m32</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 22 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 64-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWD</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 23 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sign extend 8 packed 16-bit integers in the low 16 bytes of xmm2/m128 to 8 packed 32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXWQ</mnem>
+			<args>ymm1,xmm2/m64</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 24 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 64-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVSXDQ</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 25 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Sign extend 4 packed 32-bit integers in the low 16 bytes of xmm2/m128 to 4 packed 64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMOVZX--Packed Move with Zero Extend.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVZXBW</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0f 38 30 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVZXBD</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">66 0f 38 31 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVZXBQ</mnem>
+			<args>xmm1,xmm2/m16</args>
+			<opc openc="RM">66 0f 38 32 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVZXWD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0f 38 33 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVZXWQ</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">66 0f 38 34 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMOVZXDQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0f 38 35 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBW</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 30 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBD</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 31 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBQ</mnem>
+			<args>xmm1,xmm2/m16</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 32 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 33 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWQ</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 34 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXDQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 35 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBW</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 30 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Zero extend 16 packed 8-bit integers in the low 16 bytes of xmm2/m128 to 16 packed 16-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBD</mnem>
+			<args>ymm1,xmm2/m64</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 31 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 32-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXBQ</mnem>
+			<args>ymm1,xmm2/m32</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 32 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 64-bit integers in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWD</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 33 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Zero extend 8 packed 16-bit integers in the low 16 bytes of xmm2/m128 to 8 packed 32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXWQ</mnem>
+			<args>ymm1,xmm2/m64</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 34 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 64-bit integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMOVZXDQ</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 35 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Zero extend 4 packed 32-bit integers in the low 16 bytes of xmm2/m128 to 4 packed 64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULDQ--Multiply Packed Signed Dword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 28 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed dword integers in xmm1 and xmm2/m128 and store the quadword product in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 28 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed signed doubleword integers in xmm2 by packed signed doubleword integers in xmm3/m128, and store the quadword results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 28 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply packed signed doubleword integers in ymm2 by packed signed doubleword integers in ymm3/m256, and store the quadword results in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULHRSW--Packed Multiply High with Round and Scale.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULHRSW</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F 38 0B /r1</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to mm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULHRSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 0B /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHRSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 0B /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHRSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 0B /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULHUW--Multiply Packed Unsigned Integers and Store High Result.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULHUW</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F E4 /r1</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Multiply the packed unsigned word integers in mm1 register and mm2/m64, and store the high 16 bits of the results in mm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULHUW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E4 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed unsigned word integers in xmm1 and xmm2/m128, and store the high 16 bits of the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHUW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG E4 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the packed unsigned word integers in xmm2 and xmm3/m128, and store the high 16 bits of the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHUW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG E4 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed unsigned word integers in ymm2 and ymm3/m256, and store the high 16 bits of the results in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULHW--Multiply Packed Signed Integers and Store High Result.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULHW</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F E5 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in mm1 register and mm2/m64, and store the high 16 bits of the results in mm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULHW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E5 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the high 16 bits of the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG E5 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in xmm2 and xmm3/m128, and store the high 16 bits of the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULHW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG E5 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in ymm2 and ymm3/m256, and store the high 16 bits of the results in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULLD--Multiply Packed Signed Dword Integers and Store Low Result.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULLD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 40 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Multiply the packed dword signed integers in xmm1 and xmm2/m128 and store the low 32 bits of each product in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 40 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the packed dword signed integers in xmm2 and xmm3/m128 and store the low 32 bits of each product in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 40 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed dword signed integers in ymm2 and ymm3/m256 and store the low 32 bits of each product in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULLW--Multiply Packed Signed Integers and Store Low Result.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULLW</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F D5 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in mm1 register and mm2/m64, and store the low 16 bits of the results in mm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULLW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D5 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D5 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply the packed dword signed integers in xmm2 and xmm3/m128 and store the low 32 bits of each product in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULLW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D5 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply the packed signed word integers in ymm2 and ymm3/m256, and store the low 16 bits of the results in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PMULUDQ--Multiply Packed Unsigned Doubleword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULUDQ</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F F4 /r1</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply unsigned doubleword integer in mm1 by unsigned doubleword integer in mm2/m64, and store the quadword result in mm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PMULUDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F4 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers in xmm2/m128, and store the quadword results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULUDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F4 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Multiply packed unsigned doubleword integers in xmm2 by packed unsigned doubleword integers in xmm3/m128, and store the quadword results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMULUDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F4 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Multiply packed unsigned doubleword integers in ymm2 by packed unsigned doubleword integers in ymm3/m256, and store the quadword results in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>POP--Pop a Value from the Stack.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>POP</mnem>
+			<args>r/m16</args>
+			<opc openc="M">8F /0</opc>
+			<dscrp>Pop top of stack into m16; increment stack pointer.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>POP</mnem>
+			<args>r/m32</args>
+			<opc openc="M">8F /0</opc>
+			<dscrp>Pop top of stack into m32; increment stack pointer.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>POP</mnem>
+			<args>r/m64</args>
+			<opc openc="M">8F /0</opc>
+			<dscrp>Pop top of stack into m64; increment stack pointer. Cannot encode 32-bit operand size.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>POP</mnem>
+			<args>r16</args>
+			<opc openc="O">58+ rw</opc>
+			<dscrp>Pop top of stack into r16; increment stack pointer.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>POP</mnem>
+			<args>r32</args>
+			<opc openc="O">58+ rd</opc>
+			<dscrp>Pop top of stack into r32; increment stack pointer.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>POP</mnem>
+			<args>r64</args>
+			<opc openc="O">58+ rd</opc>
+			<dscrp>Pop top of stack into r64; increment stack pointer. Cannot encode 32-bit operand size.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>POP</mnem>
+			<args>DS</args>
+			<opc openc="NP">1F</opc>
+			<dscrp>Pop top of stack into DS; increment stack pointer.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>POP</mnem>
+			<args>ES</args>
+			<opc openc="NP">07</opc>
+			<dscrp>Pop top of stack into ES; increment stack pointer.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>POP</mnem>
+			<args>SS</args>
+			<opc openc="NP">17</opc>
+			<dscrp>Pop top of stack into SS; increment stack pointer.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>POP</mnem>
+			<args>FS</args>
+			<opc openc="NP">0F A1</opc>
+			<dscrp>Pop top of stack into FS; increment stack pointer by 16 bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>POP</mnem>
+			<args>FS</args>
+			<opc openc="NP">0F A1</opc>
+			<dscrp>Pop top of stack into FS; increment stack pointer by 32 bits.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>POP</mnem>
+			<args>FS</args>
+			<opc openc="NP">0F A1</opc>
+			<dscrp>Pop top of stack into FS; increment stack pointer by 64 bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>POP</mnem>
+			<args>GS</args>
+			<opc openc="NP">0F A9</opc>
+			<dscrp>Pop top of stack into GS; increment stack pointer by 16 bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>POP</mnem>
+			<args>GS</args>
+			<opc openc="NP">0F A9</opc>
+			<dscrp>Pop top of stack into GS; increment stack pointer by 32 bits.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>POP</mnem>
+			<args>GS</args>
+			<opc openc="NP">0F A9</opc>
+			<dscrp>Pop top of stack into GS; increment stack pointer by 64 bits.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="O">
+			<oprnd1>opcode + rd(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>POPA/POPAD--Pop All General-Purpose Registers.</brief>
+		<ins x32m="V" x64m="I">
+			<mnem>POPA</mnem>
+			<args>void</args>
+			<opc openc="NP">61</opc>
+			<dscrp>Pop DI, SI, BP, BX, DX, CX, and AX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>POPAD</mnem>
+			<args>void</args>
+			<opc openc="NP">61</opc>
+			<dscrp>Pop EDI, ESI, EBP, EBX, EDX, ECX, and EAX.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>POPCNT--Return the Count of Number of Bits Set to 1.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>POPCNT</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">F3 0F B8 /r</opc>
+			<dscrp>POPCNT on r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>POPCNT</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">F3 0F B8 /r</opc>
+			<dscrp>POPCNT on r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>POPCNT</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">F3 REX.W 0F B8 /r</opc>
+			<dscrp>POPCNT on r/m64.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>POPF/POPFD/POPFQ--Pop Stack into EFLAGS Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>POPF</mnem>
+			<args>void</args>
+			<opc openc="NP">9D</opc>
+			<dscrp>Pop top of stack into lower 16 bits of EFLAGS.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>POPFD</mnem>
+			<args>void</args>
+			<opc openc="NP">9D</opc>
+			<dscrp>Pop top of stack into EFLAGS.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>POPFQ</mnem>
+			<args>void</args>
+			<opc openc="NP">9D</opc>
+			<dscrp>Pop top of stack and zero-extend into RFLAGS.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>POR--Bitwise Logical OR.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>POR</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F EB /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of mm/m64 and mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>POR</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F EB /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPOR</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG EB /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of xmm2/m128 and xmm3.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPOR</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG EB /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Bitwise OR of ymm2/m256 and ymm3.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PREFETCHh--Prefetch Data Into Caches.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PREFETCHT0</mnem>
+			<args>m8</args>
+			<opc openc="M">0F 18 /1</opc>
+			<dscrp>Move data from m8 closer to the processor using T0 hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PREFETCHT1</mnem>
+			<args>m8</args>
+			<opc openc="M">0F 18 /2</opc>
+			<dscrp>Move data from m8 closer to the processor using T1 hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PREFETCHT2</mnem>
+			<args>m8</args>
+			<opc openc="M">0F 18 /3</opc>
+			<dscrp>Move data from m8 closer to the processor using T2 hint.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PREFETCHNTA</mnem>
+			<args>m8</args>
+			<opc openc="M">0F 18 /0</opc>
+			<dscrp>Move data from m8 closer to the processor using NTA hint.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PREFETCHW--Prefetch Data into Caches in Anticipation of a Write.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PREFETCHW</mnem>
+			<args>m8</args>
+			<opc openc="M">0F 0D /1</opc>
+			<cpuid>
+				<flag>PRFCHW</flag>
+			</cpuid>
+			<dscrp>Move data from m8 closer to the processor in anticipation of a write.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PREFETCHWT1--Prefetch Vector Data Into Caches with Intent to Write and T1 Hint.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PREFETCHWT1</mnem>
+			<args>m8</args>
+			<opc openc="M">0F 0D /2</opc>
+			<cpuid>
+				<flag>PREFETCHWT1</flag>
+			</cpuid>
+			<dscrp>Move data from m8 closer to the processor using T1 hint with intent to write.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSADBW--Compute Sum of Absolute Differences.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSADBW</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F F6 /r1</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Computes the absolute differences of the packed unsigned byte integers from mm2 /m64 and mm1; differences are then summed to produce an unsigned word integer result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSADBW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F6 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Computes the absolute differences of the packed unsigned byte integers from xmm2 /m128 and xmm1; the 8 low differences and 8 high differences are then summed separately to produce two unsigned word integer results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSADBW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F6 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes the absolute differences of the packed unsigned byte integers from xmm3 /m128 and xmm2; the 8 low differences and 8 high differences are then summed separately to produce two unsigned word integer results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSADBW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F6 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Computes the absolute differences of the packed unsigned byte integers from ymm3 /m256 and ymm2; then each consecutive 8 differences are summed separately to produce four unsigned word integer results.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSHUFB--Packed Shuffle Bytes.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSHUFB</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F 38 00 /r1</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Shuffle bytes in mm1 according to contents of mm2/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSHUFB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 00 /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Shuffle bytes in xmm1 according to contents of xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 00 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle bytes in xmm2 according to contents of xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 00 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shuffle bytes in ymm2 according to contents of ymm3/m256.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSHUFD--Shuffle Packed Doublewords.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSHUFD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F 70 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">VEX.128.66.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFD</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.66.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shuffle the doublewords in ymm2/m256 based on the encoding in imm8 and store the result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSHUFHW--Shuffle Packed High Words.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSHUFHW</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">F3 0F 70 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFHW</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">VEX.128.F3.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFHW</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.F3.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shuffle the high words in ymm2/m256 based on the encoding in imm8 and store the result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSHUFLW--Shuffle Packed Low Words.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSHUFLW</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">F2 0F 70 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFLW</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">VEX.128.F2.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSHUFLW</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.F2.0F.WIG 70 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shuffle the low words in ymm2/m256 based on the encoding in imm8 and store the result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSHUFW--Shuffle Packed Words.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSHUFW</mnem>
+			<args>mm1,mm2/m64,imm8</args>
+			<opc openc="RMI">0F 70 /r ib</opc>
+			<dscrp>Shuffle the words in mm2/m64 based on the encoding in imm8 and store the result in mm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSIGNB/PSIGNW/PSIGND--Packed SIGN.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSIGNB</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F 38 08 /r1</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Negate/zero/preserve packed byte integers in mm1 depending on the corresponding sign in mm2/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSIGNB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 08 /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Negate/zero/preserve packed byte integers in xmm1 depending on the corresponding sign in xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSIGNW</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F 38 09 /r1</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Negate/zero/preserve packed word integers in mm1 depending on the corresponding sign in mm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSIGNW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 09 /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Negate/zero/preserve packed word integers in xmm1 depending on the corresponding sign in xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSIGND</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F 38 0A /r1</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Negate/zero/preserve packed doubleword integers in mm1 depending on the corresponding sign in mm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSIGND</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 0A /r</opc>
+			<cpuid>
+				<flag>SSSE3</flag>
+			</cpuid>
+			<dscrp>Negate/zero/preserve packed doubleword integers in xmm1 depending on the corresponding sign in xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSIGNB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 08 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Negate/zero/preserve packed byte integers in xmm2 depending on the corresponding sign in xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSIGNW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 09 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Negate/zero/preserve packed word integers in xmm2 depending on the corresponding sign in xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSIGND</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 0A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Negate/zero/preserve packed doubleword integers in xmm2 depending on the corresponding sign in xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSIGNB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 08 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Negate packed byte integers in ymm2 if the corresponding sign in ymm3/m256 is less than zero.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSIGNW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 09 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Negate packed 16-bit integers in ymm2 if the corresponding sign in ymm3/m256 is less than zero.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSIGND</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 0A /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Negate packed doubleword integers in ymm2 if the corresponding sign in ymm3/m256 is less than zero.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSLLDQ--Shift Double Quadword Left Logical.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLDQ</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 73 /7 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift xmm1 left by imm8 bytes while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLDQ</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 73 /7 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift xmm2 left by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLDQ</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 73 /7 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift ymm2 left by imm8 bytes while shifting in 0s and store result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSLLW/PSLLD/PSLLQ--Shift Packed Data Left Logical.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLW</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F F1 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Shift words in mm left mm/m64 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F1 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm1 left by xmm2/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLW</mnem>
+			<args>mm1,imm8</args>
+			<opc openc="MI">0F 71 /6 ib</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Shift words in mm left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLW</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 71 /6 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm1 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLD</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F F2 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in mm left by mm/m64 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F2 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm1 left by xmm2/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLD</mnem>
+			<args>mm,imm8</args>
+			<opc openc="MI">0F 72 /6 ib1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in mm left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLD</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 72 /6 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm1 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLQ</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F F3 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Shift quadword in mm left by mm/m64 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F3 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm1 left by xmm2/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLQ</mnem>
+			<args>mm,imm8</args>
+			<opc openc="MI">0F 73 /6 ib1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Shift quadword in mm left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSLLQ</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 73 /6 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm1 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F1 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 71 /6 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F2 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLD</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 72 /6 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F3 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLQ</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 73 /6 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F1 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLW</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 71 /6 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="RVM">
+			<mnem>VPSLLD</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F2 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="VMI">
+			<mnem>VPSLLD</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 72 /6 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="RVM">
+			<mnem>VPSLLQ</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F3 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="VMI">
+			<mnem>VPSLLQ</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 73 /6 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 left by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSRAW/PSRAD--Shift Packed Data Right Arithmetic.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRAW</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F E1 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Shift words in mm right by mm/m64 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRAW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E1 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm1 right by xmm2/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRAW</mnem>
+			<args>mm,imm8</args>
+			<opc openc="MI">0F 71 /4 ib1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Shift words in mm right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRAW</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 71 /4 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm1 right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRAD</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F E2 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in mm right by mm/m64 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRAD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E2 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift doubleword in xmm1 right by xmm2 /m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRAD</mnem>
+			<args>mm,imm8</args>
+			<opc openc="MI">0F 72 /4 ib1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in mm right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRAD</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 72 /4 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm1 right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG E1 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by amount specified in xmm3/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 71 /4 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG E2 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by amount specified in xmm3/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 72 /4 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG E1 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by amount specified in xmm3/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAW</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 71 /4 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG E2 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by amount specified in xmm3/m128 while shifting in sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAD</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 72 /4 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by imm8 while shifting in sign bits.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSRLDQ--Shift Double Quadword Right Logical.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLDQ</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 73 /3 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift xmm1 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLDQ</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 73 /3 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift xmm2 right by imm8 bytes while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLDQ</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 73 /3 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift ymm1 right by imm8 bytes while shifting in 0s.</dscrp>
+		</ins>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSRLW/PSRLD/PSRLQ--Shift Packed Data Right Logical.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLW</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F D1 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Shift words in mm right by amount specified in mm/m64 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D1 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm1 right by amount specified in xmm2/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLW</mnem>
+			<args>mm,imm8</args>
+			<opc openc="MI">0F 71 /2 ib1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Shift words in mm right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLW</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 71 /2 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm1 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLD</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F D2 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in mm right by amount specified in mm/m64 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D2 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm1 right by amount specified in xmm2 /m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLD</mnem>
+			<args>mm,imm8</args>
+			<opc openc="MI">0F 72 /2 ib1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in mm right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLD</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 72 /2 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm1 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLQ</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F D3 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Shift mm right by amount specified in mm/m64 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D3 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm1 right by amount specified in xmm2/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLQ</mnem>
+			<args>mm,imm8</args>
+			<opc openc="MI">0F 73 /2 ib1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Shift mm right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSRLQ</mnem>
+			<args>xmm1,imm8</args>
+			<opc openc="MI">66 0F 73 /2 ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm1 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D1 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 71 /2 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift words in xmm2 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D2 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLD</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 72 /2 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in xmm2 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D3 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLQ</mnem>
+			<args>xmm1,xmm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 73 /2 ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in xmm2 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D1 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLW</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 71 /2 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift words in ymm2 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="RVM">
+			<mnem>VPSRLD</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D2 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="VMI">
+			<mnem>VPSRLD</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 72 /2 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift doublewords in ymm2 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="RVM">
+			<mnem>VPSRLQ</mnem>
+			<args>ymm1,ymm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D3 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="VMI">
+			<mnem>VPSRLQ</mnem>
+			<args>ymm1,ymm2,imm8</args>
+			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 73 /2 ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift quadwords in ymm2 right by imm8 while shifting in 0s.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="VMI">
+			<oprnd1>VEX.vvvv(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSUBB/PSUBW/PSUBD--Subtract Packed Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBB</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F F8 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Subtract packed byte integers in mm/m64 from packed byte integers in mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F8 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed byte integers in xmm2/m128 from packed byte integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBW</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F F9 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Subtract packed word integers in mm/m64 from packed word integers in mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F F9 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed word integers in xmm2/m128 from packed word integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBD</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F FA /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Subtract packed doubleword integers in mm/m64 from packed doubleword integers in mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F FA /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed doubleword integers in xmm2/mem128 from packed doubleword integers in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F8 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed byte integers in xmm3/m128 from xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F9 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed word integers in xmm3/m128 from xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FA /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed doubleword integers in xmm3/m128 from xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F8 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed byte integers in ymm3/m256 from ymm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F9 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed word integers in ymm3/m256 from ymm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FA /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed doubleword integers in ymm3/m256 from ymm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSUBQ--Subtract Packed Quadword Integers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBQ</mnem>
+			<args>mm1,mm2/m64</args>
+			<opc openc="RM">0F FB /r1</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract quadword integer in mm1 from mm2 /m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F FB /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed quadword integers in xmm1 from xmm2 /m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FB/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed quadword integers in xmm3/m128 from xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FB /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed quadword integers in ymm3/m256 from ymm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSUBSB/PSUBSW--Subtract Packed Signed Integers with Signed Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBSB</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F E8 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Subtract signed packed bytes in mm/m64 from signed packed bytes in mm and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E8 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed byte integers in xmm2/m128 from packed signed byte integers in xmm1 and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBSW</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F E9 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Subtract signed packed words in mm/m64 from signed packed words in mm and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F E9 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed word integers in xmm2/m128 from packed signed word integers in xmm1 and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG E8 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed byte integers in xmm3/m128 from packed signed byte integers in xmm2 and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG E9 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed word integers in xmm3/m128 from packed signed word integers in xmm2 and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG E8 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed byte integers in ymm3/m256 from packed signed byte integers in ymm2 and saturate results.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG E9 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed signed word integers in ymm3/m256 from packed signed word integers in ymm2 and saturate results.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PSUBUSB/PSUBUSW--Subtract Packed Unsigned Integers with Unsigned Saturation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBUSB</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F D8 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Subtract unsigned packed bytes in mm/m64 from unsigned packed bytes in mm and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBUSB</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D8 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned byte integers in xmm2/m128 from packed unsigned byte integers in xmm1 and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBUSW</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F D9 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Subtract unsigned packed words in mm/m64 from unsigned packed words in mm and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PSUBUSW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F D9 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned word integers in xmm2/m128 from packed unsigned word integers in xmm1 and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSB</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D8 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned byte integers in xmm3/m128 from packed unsigned byte integers in xmm2 and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D9 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned word integers in xmm3/m128 from packed unsigned word integers in xmm2 and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSB</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D8 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned byte integers in ymm3/m256 from packed unsigned byte integers in ymm2 and saturate result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSUBUSW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D9 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Subtract packed unsigned word integers in ymm3/m256 from packed unsigned word integers in ymm2 and saturate result.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PTEST--Logical Compare.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PTEST</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 38 17 /r</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Set ZF if xmm2/m128 AND xmm1 result is all 0s. Set CF if xmm2/m128 AND NOT xmm1 result is all 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTEST</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F38.WIG 17 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Set ZF and CF depending on bitwise AND and ANDN of sources.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPTEST</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F38.WIG 17 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Set ZF and CF depending on bitwise AND and ANDN of sources.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PUNPCKHBW/PUNPCKHWD/PUNPCKHDQ/PUNPCKHQDQ--Unpack High Data.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKHBW</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F 68 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Unpack and interleave high-order bytes from mm and mm/m64 into mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKHBW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 68 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Unpack and interleave high-order bytes from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKHWD</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F 69 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Unpack and interleave high-order words from mm and mm/m64 into mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKHWD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 69 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Unpack and interleave high-order words from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKHDQ</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F 6A /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Unpack and interleave high-order doublewords from mm and mm/m64 into mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKHDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 6A /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Unpack and interleave high-order doublewords from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKHQDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 6D /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Unpack and interleave high-order quadwords from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHBW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 68/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave high-order bytes from xmm2 and xmm3/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHWD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 69/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave high-order words from xmm2 and xmm3/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 6A/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave high-order doublewords from xmm2 and xmm3/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHQDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 6D/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave high-order quadword from xmm2 and xmm3/m128 into xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHBW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 68 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order bytes from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHWD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 69 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order words from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 6A /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order doublewords from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKHQDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 6D /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave high-order quadword from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PUNPCKLBW/PUNPCKLWD/PUNPCKLDQ/PUNPCKLQDQ--Unpack Low Data.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKLBW</mnem>
+			<args>mm,mm/m32</args>
+			<opc openc="RM">0F 60 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Interleave low-order bytes from mm and mm/m32 into mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKLBW</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 60 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order bytes from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKLWD</mnem>
+			<args>mm,mm/m32</args>
+			<opc openc="RM">0F 61 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Interleave low-order words from mm and mm/m32 into mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKLWD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 61 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order words from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKLDQ</mnem>
+			<args>mm,mm/m32</args>
+			<opc openc="RM">0F 62 /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Interleave low-order doublewords from mm and mm/m32 into mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKLDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 62 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order doublewords from xmm1 and xmm2/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUNPCKLQDQ</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 6C /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order quadword from xmm1 and xmm2/m128 into xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLBW</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 60/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave low-order bytes from xmm2 and xmm3/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLWD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 61/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave low-order words from xmm2 and xmm3/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 62/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave low-order doublewords from xmm2 and xmm3/m128 into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLQDQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 6C/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Interleave low-order quadword from xmm2 and xmm3/m128 into xmm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLBW</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 60 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order bytes from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLWD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 61 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order words from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 62 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order doublewords from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPUNPCKLQDQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 6C /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Interleave low-order quadword from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PUSH--Push Word, Doubleword or Quadword Onto the Stack.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PUSH</mnem>
+			<args>r/m16</args>
+			<opc openc="M">FF /6</opc>
+			<dscrp>Push r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>PUSH</mnem>
+			<args>r/m32</args>
+			<opc openc="M">FF /6</opc>
+			<dscrp>Push r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>PUSH</mnem>
+			<args>r/m64</args>
+			<opc openc="M">FF /6</opc>
+			<dscrp>Push r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUSH</mnem>
+			<args>r16</args>
+			<opc openc="O">50+rw</opc>
+			<dscrp>Push r16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>PUSH</mnem>
+			<args>r32</args>
+			<opc openc="O">50+rd</opc>
+			<dscrp>Push r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>PUSH</mnem>
+			<args>r64</args>
+			<opc openc="O">50+rd</opc>
+			<dscrp>Push r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUSH</mnem>
+			<args>imm8</args>
+			<opc openc="I">6A ib</opc>
+			<dscrp>Push imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUSH</mnem>
+			<args>imm16</args>
+			<opc openc="I">68 iw</opc>
+			<dscrp>Push imm16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUSH</mnem>
+			<args>imm32</args>
+			<opc openc="I">68 id</opc>
+			<dscrp>Push imm32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>PUSH</mnem>
+			<args>CS</args>
+			<opc openc="NP">0E</opc>
+			<dscrp>Push CS.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>PUSH</mnem>
+			<args>SS</args>
+			<opc openc="NP">16</opc>
+			<dscrp>Push SS.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>PUSH</mnem>
+			<args>DS</args>
+			<opc openc="NP">1E</opc>
+			<dscrp>Push DS.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>PUSH</mnem>
+			<args>ES</args>
+			<opc openc="NP">06</opc>
+			<dscrp>Push ES.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUSH</mnem>
+			<args>FS</args>
+			<opc openc="NP">0F A0</opc>
+			<dscrp>Push FS.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PUSH</mnem>
+			<args>GS</args>
+			<opc openc="NP">0F A8</opc>
+			<dscrp>Push GS.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="O">
+			<oprnd1>opcode + rd(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="I">
+			<oprnd1>imm8(r)/16/32</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PUSHA/PUSHAD--Push All General-Purpose Registers.</brief>
+		<ins x32m="V" x64m="I">
+			<mnem>PUSHA</mnem>
+			<args>void</args>
+			<opc openc="NP">60</opc>
+			<dscrp>Push AX, CX, DX, BX, original SP, BP, SI, and DI.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="I">
+			<mnem>PUSHAD</mnem>
+			<args>void</args>
+			<opc openc="NP">60</opc>
+			<dscrp>Push EAX, ECX, EDX, EBX, original ESP, EBP, ESI, and EDI.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PUSHF/PUSHFD--Push EFLAGS Register onto the Stack.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PUSHF</mnem>
+			<args>void</args>
+			<opc openc="NP">9C</opc>
+			<dscrp>Push lower 16 bits of EFLAGS.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="NE">
+			<mnem>PUSHFD</mnem>
+			<args>void</args>
+			<opc openc="NP">9C</opc>
+			<dscrp>Push EFLAGS.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>PUSHFQ</mnem>
+			<args>void</args>
+			<opc openc="NP">9C</opc>
+			<dscrp>Push RFLAGS.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>PXOR--Logical Exclusive OR.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>PXOR</mnem>
+			<args>mm,mm/m64</args>
+			<opc openc="RM">0F EF /r1</opc>
+			<cpuid>
+				<flag>MMX</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of mm/m64 and mm.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>PXOR</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F EF /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPXOR</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG EF /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of xmm3/m128 and xmm2.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPXOR</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG EF /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Bitwise XOR of ymm3/m256 and ymm2.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>RCL/RCR/ROL/ROR---Rotate.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>RCL</mnem>
+			<args>r/m8,1</args>
+			<opc openc="M1">D0 /2</opc>
+			<dscrp>Rotate 9 bits (CF, r/m8) left once.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>RCL</mnem>
+			<args>r/m8*,1</args>
+			<opc openc="M1">REX + D0 /2</opc>
+			<dscrp>Rotate 9 bits (CF, r/m8) left once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RCL</mnem>
+			<args>r/m8,CL</args>
+			<opc openc="MC">D2 /2</opc>
+			<dscrp>Rotate 9 bits (CF, r/m8) left CL times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>RCL</mnem>
+			<args>r/m8*,CL</args>
+			<opc openc="MC">REX + D2 /2</opc>
+			<dscrp>Rotate 9 bits (CF, r/m8) left CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RCL</mnem>
+			<args>r/m8,imm8</args>
+			<opc openc="MI">C0 /2 ib</opc>
+			<dscrp>Rotate 9 bits (CF, r/m8) left imm8 times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>RCL</mnem>
+			<args>r/m8*,imm8</args>
+			<opc openc="MI">REX + C0 /2 ib</opc>
+			<dscrp>Rotate 9 bits (CF, r/m8) left imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RCL</mnem>
+			<args>r/m16,1</args>
+			<opc openc="M1">D1 /2</opc>
+			<dscrp>Rotate 17 bits (CF, r/m16) left once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RCL</mnem>
+			<args>r/m16,CL</args>
+			<opc openc="MC">D3 /2</opc>
+			<dscrp>Rotate 17 bits (CF, r/m16) left CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RCL</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">C1 /2 ib</opc>
+			<dscrp>Rotate 17 bits (CF, r/m16) left imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RCL</mnem>
+			<args>r/m32,1</args>
+			<opc openc="M1">D1 /2</opc>
+			<dscrp>Rotate 33 bits (CF, r/m32) left once.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>RCL</mnem>
+			<args>r/m64,1</args>
+			<opc openc="M1">REX.W + D1 /2</opc>
+			<dscrp>Rotate 65 bits (CF, r/m64) left once. Uses a 6.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RCL</mnem>
+			<args>r/m32,CL</args>
+			<opc openc="MC">D3 /2</opc>
+			<dscrp>Rotate 33 bits (CF, r/m32) left CL times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>RCL</mnem>
+			<args>r/m64,CL</args>
+			<opc openc="MC">REX.W + D3 /2</opc>
+			<dscrp>Rotate 65 bits (CF, r/m64) left CL times. Uses a 6 bit count.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RCL</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">C1 /2 ib</opc>
+			<dscrp>Rotate 33 bits (CF, r/m32) left imm8 times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>RCL</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + C1 /2 ib</opc>
+			<dscrp>Rotate 65 bits (CF, r/m64) left imm8 times. Uses a 6 bit count.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RCR</mnem>
+			<args>r/m8,1</args>
+			<opc openc="M1">D0 /3</opc>
+			<dscrp>Rotate 9 bits (CF, r/m8) right once.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>RCR</mnem>
+			<args>r/m8*,1</args>
+			<opc openc="M1">REX + D0 /3</opc>
+			<dscrp>Rotate 9 bits (CF, r/m8) right once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RCR</mnem>
+			<args>r/m8,CL</args>
+			<opc openc="MC">D2 /3</opc>
+			<dscrp>Rotate 9 bits (CF, r/m8) right CL times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>RCR</mnem>
+			<args>r/m8*,CL</args>
+			<opc openc="MC">REX + D2 /3</opc>
+			<dscrp>Rotate 9 bits (CF, r/m8) right CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RCR</mnem>
+			<args>r/m8,imm8</args>
+			<opc openc="MI">C0 /3 ib</opc>
+			<dscrp>Rotate 9 bits (CF, r/m8) right imm8 times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>RCR</mnem>
+			<args>r/m8*,imm8</args>
+			<opc openc="MI">REX + C0 /3 ib</opc>
+			<dscrp>Rotate 9 bits (CF, r/m8) right imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RCR</mnem>
+			<args>r/m16,1</args>
+			<opc openc="M1">D1 /3</opc>
+			<dscrp>Rotate 17 bits (CF, r/m16) right once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RCR</mnem>
+			<args>r/m16,CL</args>
+			<opc openc="MC">D3 /3</opc>
+			<dscrp>Rotate 17 bits (CF, r/m16) right CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RCR</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">C1 /3 ib</opc>
+			<dscrp>Rotate 17 bits (CF, r/m16) right imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RCR</mnem>
+			<args>r/m32,1</args>
+			<opc openc="M1">D1 /3</opc>
+			<dscrp>Rotate 33 bits (CF, r/m32) right once. Uses a 6.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>RCR</mnem>
+			<args>r/m64,1</args>
+			<opc openc="M1">REX.W + D1 /3</opc>
+			<dscrp>Rotate 65 bits (CF, r/m64) right once. Uses a 6.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RCR</mnem>
+			<args>r/m32,CL</args>
+			<opc openc="MC">D3 /3</opc>
+			<dscrp>Rotate 33 bits (CF, r/m32) right CL times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>RCR</mnem>
+			<args>r/m64,CL</args>
+			<opc openc="MC">REX.W + D3 /3</opc>
+			<dscrp>Rotate 65 bits (CF, r/m64) right CL times. Uses a 6 bit count.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RCR</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">C1 /3 ib</opc>
+			<dscrp>Rotate 33 bits (CF, r/m32) right imm8 times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>RCR</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + C1 /3 ib</opc>
+			<dscrp>Rotate 65 bits (CF, r/m64) right imm8 times. Uses a 6 bit count.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ROL</mnem>
+			<args>r/m8,1</args>
+			<opc openc="M1">D0 /0</opc>
+			<dscrp>Rotate 8 bits r/m8 left once.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ROL</mnem>
+			<args>r/m8*,1</args>
+			<opc openc="M1">REX + D0 /0</opc>
+			<dscrp>Rotate 8 bits r/m8 left once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ROL</mnem>
+			<args>r/m8,CL</args>
+			<opc openc="MC">D2 /0</opc>
+			<dscrp>Rotate 8 bits r/m8 left CL times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ROL</mnem>
+			<args>r/m8*,CL</args>
+			<opc openc="MC">REX + D2 /0</opc>
+			<dscrp>Rotate 8 bits r/m8 left CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ROL</mnem>
+			<args>r/m8,imm8</args>
+			<opc openc="MI">C0 /0 ib</opc>
+			<dscrp>Rotate 8 bits r/m8 left imm8 times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ROL</mnem>
+			<args>r/m8*,imm8</args>
+			<opc openc="MI">REX + C0 /0 ib</opc>
+			<dscrp>Rotate 8 bits r/m8 left imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ROL</mnem>
+			<args>r/m16,1</args>
+			<opc openc="M1">D1 /0</opc>
+			<dscrp>Rotate 16 bits r/m16 left once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ROL</mnem>
+			<args>r/m16,CL</args>
+			<opc openc="MC">D3 /0</opc>
+			<dscrp>Rotate 16 bits r/m16 left CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ROL</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">C1 /0 ib</opc>
+			<dscrp>Rotate 16 bits r/m16 left imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ROL</mnem>
+			<args>r/m32,1</args>
+			<opc openc="M1">D1 /0</opc>
+			<dscrp>Rotate 32 bits r/m32 left once.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ROL</mnem>
+			<args>r/m64,1</args>
+			<opc openc="M1">REX.W + D1 /0</opc>
+			<dscrp>Rotate 64 bits r/m64 left once. Uses a 6 bit count.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ROL</mnem>
+			<args>r/m32,CL</args>
+			<opc openc="MC">D3 /0</opc>
+			<dscrp>Rotate 32 bits r/m32 left CL times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ROL</mnem>
+			<args>r/m64,CL</args>
+			<opc openc="MC">REX.W + D3 /0</opc>
+			<dscrp>Rotate 64 bits r/m64 left CL times. Uses a 6.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ROL</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">C1 /0 ib</opc>
+			<dscrp>Rotate 32 bits r/m32 left imm8 times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ROL</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + C1 /0 ib</opc>
+			<dscrp>Rotate 64 bits r/m64 left imm8 times. Uses a 6 bit count.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ROR</mnem>
+			<args>r/m8,1</args>
+			<opc openc="M1">D0 /1</opc>
+			<dscrp>Rotate 8 bits r/m8 right once.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ROR</mnem>
+			<args>r/m8*,1</args>
+			<opc openc="M1">REX + D0 /1</opc>
+			<dscrp>Rotate 8 bits r/m8 right once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ROR</mnem>
+			<args>r/m8,CL</args>
+			<opc openc="MC">D2 /1</opc>
+			<dscrp>Rotate 8 bits r/m8 right CL times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ROR</mnem>
+			<args>r/m8*,CL</args>
+			<opc openc="MC">REX + D2 /1</opc>
+			<dscrp>Rotate 8 bits r/m8 right CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ROR</mnem>
+			<args>r/m8,imm8</args>
+			<opc openc="MI">C0 /1 ib</opc>
+			<dscrp>Rotate 8 bits r/m16 right imm8 times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ROR</mnem>
+			<args>r/m8*,imm8</args>
+			<opc openc="MI">REX + C0 /1 ib</opc>
+			<dscrp>Rotate 8 bits r/m16 right imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ROR</mnem>
+			<args>r/m16,1</args>
+			<opc openc="M1">D1 /1</opc>
+			<dscrp>Rotate 16 bits r/m16 right once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ROR</mnem>
+			<args>r/m16,CL</args>
+			<opc openc="MC">D3 /1</opc>
+			<dscrp>Rotate 16 bits r/m16 right CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ROR</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">C1 /1 ib</opc>
+			<dscrp>Rotate 16 bits r/m16 right imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ROR</mnem>
+			<args>r/m32,1</args>
+			<opc openc="M1">D1 /1</opc>
+			<dscrp>Rotate 32 bits r/m32 right once.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ROR</mnem>
+			<args>r/m64,1</args>
+			<opc openc="M1">REX.W + D1 /1</opc>
+			<dscrp>Rotate 64 bits r/m64 right once. Uses a 6 bit count.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ROR</mnem>
+			<args>r/m32,CL</args>
+			<opc openc="MC">D3 /1</opc>
+			<dscrp>Rotate 32 bits r/m32 right CL times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ROR</mnem>
+			<args>r/m64,CL</args>
+			<opc openc="MC">REX.W + D3 /1</opc>
+			<dscrp>Rotate 64 bits r/m64 right CL times. Uses a 6.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>ROR</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">C1 /1 ib</opc>
+			<dscrp>Rotate 32 bits r/m32 right imm8 times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>ROR</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + C1 /1 ib</opc>
+			<dscrp>Rotate 64 bits r/m64 right imm8 times. Uses a 6 bit count.</dscrp>
+		</ins>
+		<oprndenc openc="M1">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>1</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MC">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>CL</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>RCPPS--Compute Reciprocals of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>RCPPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 53 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals of the packed single-precision floating-point values in xmm2/m128 and stores the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCPPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.0F.WIG 53 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals of packed single-precision values in xmm2/mem and stores the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCPPS</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.0F.WIG 53 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals of packed single-precision values in ymm2/mem and stores the results in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>RCPSS--Compute Reciprocal of Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>RCPSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 53 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal of the scalar single-precision floating-point value in xmm2/m32 and stores the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRCPSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.LIG.F3.0F.WIG 53 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal of the scalar single-precision floating-point value in xmm3/m32 and stores the result in xmm1. Also, upper single precision floating-point values (bits[127:32]) from xmm2 are copied to xmm1[127:32].</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>RDFSBASE/RDGSBASE--Read FS/GS Segment Base.</brief>
+		<ins x32m="I" x64m="V">
+			<mnem>RDFSBASE</mnem>
+			<args>r32</args>
+			<opc openc="M">F3 0F AE /0</opc>
+			<cpuid>
+				<flag>FSGSBASE</flag>
+			</cpuid>
+			<dscrp>Load the 32-bit destination register with the FS base address.</dscrp>
+		</ins>
+		<ins x32m="I" x64m="V">
+			<mnem>RDFSBASE</mnem>
+			<args>r64</args>
+			<opc openc="M">F3 REX.W 0F AE /0</opc>
+			<cpuid>
+				<flag>FSGSBASE</flag>
+			</cpuid>
+			<dscrp>Load the 64-bit destination register with the FS base address.</dscrp>
+		</ins>
+		<ins x32m="I" x64m="V">
+			<mnem>RDGSBASE</mnem>
+			<args>r32</args>
+			<opc openc="M">F3 0F AE /1</opc>
+			<cpuid>
+				<flag>FSGSBASE</flag>
+			</cpuid>
+			<dscrp>Load the 32-bit destination register with the GS base address.</dscrp>
+		</ins>
+		<ins x32m="I" x64m="V">
+			<mnem>RDGSBASE</mnem>
+			<args>r64</args>
+			<opc openc="M">F3 REX.W 0F AE /1</opc>
+			<cpuid>
+				<flag>FSGSBASE</flag>
+			</cpuid>
+			<dscrp>Load the 64-bit destination register with the GS base address.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>RDMSR--Read from Model Specific Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>RDMSR</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 32</opc>
+			<dscrp>Read MSR specified by ECX into EDX:EAX.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>RDPKRU--Read Protection Key Rights for User Pages.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>RDPKRU</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 01 EE</opc>
+			<cpuid>
+				<flag>OSPKE</flag>
+			</cpuid>
+			<dscrp>Reads PKRU into EAX.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>RDPMC--Read Performance-Monitoring Counters.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>RDPMC</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 33</opc>
+			<dscrp>Read performance-monitoring counter specified by ECX into EDX:EAX.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>RDRAND--Read Random Number.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>RDRAND</mnem>
+			<args>r16</args>
+			<opc openc="M">0F C7 /6</opc>
+			<cpuid>
+				<flag>RDRAND</flag>
+			</cpuid>
+			<dscrp>Read a 16-bit random number and store in the destination register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RDRAND</mnem>
+			<args>r32</args>
+			<opc openc="M">0F C7 /6</opc>
+			<cpuid>
+				<flag>RDRAND</flag>
+			</cpuid>
+			<dscrp>Read a 32-bit random number and store in the destination register.</dscrp>
+		</ins>
+		<ins x32m="I" x64m="V">
+			<mnem>RDRAND</mnem>
+			<args>r64</args>
+			<opc openc="M">REX.W + 0F C7 /6</opc>
+			<cpuid>
+				<flag>RDRAND</flag>
+			</cpuid>
+			<dscrp>Read a 64-bit random number and store in the destination register.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>RDSEED--Read Random SEED.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>RDSEED</mnem>
+			<args>r16</args>
+			<opc openc="M">0F C7 /7</opc>
+			<cpuid>
+				<flag>RDSEED</flag>
+			</cpuid>
+			<dscrp>Read a 16-bit NIST SP800-90B &amp; C compliant random value and store in the destination register.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RDSEED</mnem>
+			<args>r32</args>
+			<opc openc="M">0F C7 /7</opc>
+			<cpuid>
+				<flag>RDSEED</flag>
+			</cpuid>
+			<dscrp>Read a 32-bit NIST SP800-90B &amp; C compliant random value and store in the destination register.</dscrp>
+		</ins>
+		<ins x32m="I" x64m="V">
+			<mnem>RDSEED</mnem>
+			<args>r64</args>
+			<opc openc="M">REX.W + 0F C7 /7</opc>
+			<cpuid>
+				<flag>RDSEED</flag>
+			</cpuid>
+			<dscrp>Read a 64-bit NIST SP800-90B &amp; C compliant random value and store in the destination register.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>RDTSC--Read Time-Stamp Counter.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>RDTSC</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 31</opc>
+			<dscrp>Read time-stamp counter into EDX:EAX.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>RDTSCP--Read Time-Stamp Counter and Processor ID.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>RDTSCP</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 01 F9</opc>
+			<dscrp>Read 64-bit time-stamp counter and 32-bit IA32_TSC_AUX value into EDX:EAX and ECX.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>REP/REPE/REPZ/REPNE/REPNZ--Repeat String Operation Prefix.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>REP</mnem>
+			<args>INS m8,DX</args>
+			<opc openc="NP">F3 6C</opc>
+			<dscrp>Input (E)CX bytes from port DX into ES:[(E)DI].</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>REP</mnem>
+			<args>INS m8,DX</args>
+			<opc openc="NP">F3 6C</opc>
+			<dscrp>Input RCX bytes from port DX into [RDI].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REP</mnem>
+			<args>INS m16,DX</args>
+			<opc openc="NP">F3 6D</opc>
+			<dscrp>Input (E)CX words from port DX into ES:[(E)DI.].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REP</mnem>
+			<args>INS m32,DX</args>
+			<opc openc="NP">F3 6D</opc>
+			<dscrp>Input (E)CX doublewords from port DX into ES:[(E)DI].</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>REP</mnem>
+			<args>INS r/m32,DX</args>
+			<opc openc="NP">F3 6D</opc>
+			<dscrp>Input RCX default size from port DX into [RDI].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REP</mnem>
+			<args>MOVS m8,m8</args>
+			<opc openc="NP">F3 A4</opc>
+			<dscrp>Move (E)CX bytes from DS:[(E)SI] to ES:[(E)DI].</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>REP</mnem>
+			<args>MOVS m8,m8</args>
+			<opc openc="NP">F3 REX.W A4</opc>
+			<dscrp>Move RCX bytes from [RSI] to [RDI].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REP</mnem>
+			<args>MOVS m16,m16</args>
+			<opc openc="NP">F3 A5</opc>
+			<dscrp>Move (E)CX words from DS:[(E)SI] to ES:[(E)DI].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REP</mnem>
+			<args>MOVS m32,m32</args>
+			<opc openc="NP">F3 A5</opc>
+			<dscrp>Move (E)CX doublewords from DS:[(E)SI] to ES:[(E)DI].</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>REP</mnem>
+			<args>MOVS m64,m64</args>
+			<opc openc="NP">F3 REX.W A5</opc>
+			<dscrp>Move RCX quadwords from [RSI] to [RDI].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REP</mnem>
+			<args>OUTS DX,r/m8</args>
+			<opc openc="NP">F3 6E</opc>
+			<dscrp>Output (E)CX bytes from DS:[(E)SI] to port DX.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>REP</mnem>
+			<args>OUTS DX,r/m8*</args>
+			<opc openc="NP">F3 REX.W 6E</opc>
+			<dscrp>Output RCX bytes from [RSI] to port DX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REP</mnem>
+			<args>OUTS DX,r/m16</args>
+			<opc openc="NP">F3 6F</opc>
+			<dscrp>Output (E)CX words from DS:[(E)SI] to port DX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REP</mnem>
+			<args>OUTS DX,r/m32</args>
+			<opc openc="NP">F3 6F</opc>
+			<dscrp>Output (E)CX doublewords from DS:[(E)SI] to port DX.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>REP</mnem>
+			<args>OUTS DX,r/m32</args>
+			<opc openc="NP">F3 REX.W 6F</opc>
+			<dscrp>Output RCX default size from [RSI] to port DX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REP</mnem>
+			<args>LODS AL</args>
+			<opc openc="NP">F3 AC</opc>
+			<dscrp>Load (E)CX bytes from DS:[(E)SI] to AL.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>REP</mnem>
+			<args>LODS AL</args>
+			<opc openc="NP">F3 REX.W AC</opc>
+			<dscrp>Load RCX bytes from [RSI] to AL.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REP</mnem>
+			<args>LODS AX</args>
+			<opc openc="NP">F3 AD</opc>
+			<dscrp>Load (E)CX words from DS:[(E)SI] to AX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REP</mnem>
+			<args>LODS EAX</args>
+			<opc openc="NP">F3 AD</opc>
+			<dscrp>Load (E)CX doublewords from DS:[(E)SI] to EAX.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>REP</mnem>
+			<args>LODS RAX</args>
+			<opc openc="NP">F3 REX.W AD</opc>
+			<dscrp>Load RCX quadwords from [RSI] to RAX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REP</mnem>
+			<args>STOS m8</args>
+			<opc openc="NP">F3 AA</opc>
+			<dscrp>Fill (E)CX bytes at ES:[(E)DI] with AL.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>REP</mnem>
+			<args>STOS m8</args>
+			<opc openc="NP">F3 REX.W AA</opc>
+			<dscrp>Fill RCX bytes at [RDI] with AL.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REP</mnem>
+			<args>STOS m16</args>
+			<opc openc="NP">F3 AB</opc>
+			<dscrp>Fill (E)CX words at ES:[(E)DI] with AX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REP</mnem>
+			<args>STOS m32</args>
+			<opc openc="NP">F3 AB</opc>
+			<dscrp>Fill (E)CX doublewords at ES:[(E)DI] with EAX.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>REP</mnem>
+			<args>STOS m64</args>
+			<opc openc="NP">F3 REX.W AB</opc>
+			<dscrp>Fill RCX quadwords at [RDI] with RAX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REPE</mnem>
+			<args>CMPS m8,m8</args>
+			<opc openc="NP">F3 A6</opc>
+			<dscrp>Find nonmatching bytes in ES:[(E)DI] and DS:[(E)SI].</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>REPE</mnem>
+			<args>CMPS m8,m8</args>
+			<opc openc="NP">F3 REX.W A6</opc>
+			<dscrp>Find non-matching bytes in [RDI] and [RSI].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REPE</mnem>
+			<args>CMPS m16,m16</args>
+			<opc openc="NP">F3 A7</opc>
+			<dscrp>Find nonmatching words in ES:[(E)DI] and DS:[(E)SI].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REPE</mnem>
+			<args>CMPS m32,m32</args>
+			<opc openc="NP">F3 A7</opc>
+			<dscrp>Find nonmatching doublewords in ES:[(E)DI] and DS:[(E)SI].</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>REPE</mnem>
+			<args>CMPS m64,m64</args>
+			<opc openc="NP">F3 REX.W A7</opc>
+			<dscrp>Find non-matching quadwords in [RDI] and [RSI].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REPE</mnem>
+			<args>SCAS m8</args>
+			<opc openc="NP">F3 AE</opc>
+			<dscrp>Find non-AL byte starting at ES:[(E)DI].</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>REPE</mnem>
+			<args>SCAS m8</args>
+			<opc openc="NP">F3 REX.W AE</opc>
+			<dscrp>Find non-AL byte starting at [RDI].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REPE</mnem>
+			<args>SCAS m16</args>
+			<opc openc="NP">F3 AF</opc>
+			<dscrp>Find non-AX word starting at ES:[(E)DI].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REPE</mnem>
+			<args>SCAS m32</args>
+			<opc openc="NP">F3 AF</opc>
+			<dscrp>Find non-EAX doubleword starting at ES:[(E)DI].</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>REPE</mnem>
+			<args>SCAS m64</args>
+			<opc openc="NP">F3 REX.W AF</opc>
+			<dscrp>Find non-RAX quadword starting at [RDI].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REPNE</mnem>
+			<args>CMPS m8,m8</args>
+			<opc openc="NP">F2 A6</opc>
+			<dscrp>Find matching bytes in ES:[(E)DI] and DS:[(E)SI].</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>REPNE</mnem>
+			<args>CMPS m8,m8</args>
+			<opc openc="NP">F2 REX.W A6</opc>
+			<dscrp>Find matching bytes in [RDI] and [RSI].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REPNE</mnem>
+			<args>CMPS m16,m16</args>
+			<opc openc="NP">F2 A7</opc>
+			<dscrp>Find matching words in ES:[(E)DI] and DS:[(E)SI].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REPNE</mnem>
+			<args>CMPS m32,m32</args>
+			<opc openc="NP">F2 A7</opc>
+			<dscrp>Find matching doublewords in ES:[(E)DI] and DS:[(E)SI].</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>REPNE</mnem>
+			<args>CMPS m64,m64</args>
+			<opc openc="NP">F2 REX.W A7</opc>
+			<dscrp>Find matching doublewords in [RDI] and [RSI].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REPNE</mnem>
+			<args>SCAS m8</args>
+			<opc openc="NP">F2 AE</opc>
+			<dscrp>Find AL, starting at ES:[(E)DI].</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>REPNE</mnem>
+			<args>SCAS m8</args>
+			<opc openc="NP">F2 REX.W AE</opc>
+			<dscrp>Find AL, starting at [RDI].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REPNE</mnem>
+			<args>SCAS m16</args>
+			<opc openc="NP">F2 AF</opc>
+			<dscrp>Find AX, starting at ES:[(E)DI].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>REPNE</mnem>
+			<args>SCAS m32</args>
+			<opc openc="NP">F2 AF</opc>
+			<dscrp>Find EAX, starting at ES:[(E)DI].</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>REPNE</mnem>
+			<args>SCAS m64</args>
+			<opc openc="NP">F2 REX.W AF</opc>
+			<dscrp>Find RAX, starting at [RDI].</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>RET--Return from Procedure.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>RET</mnem>
+			<args>void</args>
+			<opc openc="NP">C3</opc>
+			<dscrp>Near return to calling procedure.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RET</mnem>
+			<args>void</args>
+			<opc openc="NP">CB</opc>
+			<dscrp>Far return to calling procedure.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RET</mnem>
+			<args>imm16</args>
+			<opc openc="I">C2 iw</opc>
+			<dscrp>Near return to calling procedure and pop imm16 bytes from stack.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>RET</mnem>
+			<args>imm16</args>
+			<opc openc="I">CA iw</opc>
+			<dscrp>Far return to calling procedure and pop imm16 bytes from stack.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="I">
+			<oprnd1>imm16(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>RORX--Rotate Right Logical Without Affecting Flags.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>RORX</mnem>
+			<args>r32,r/m32,imm8</args>
+			<opc openc="RMI">VEX.LZ.F2.0F3A.W0 F0 /r ib</opc>
+			<cpuid>
+				<flag>BMI2</flag>
+			</cpuid>
+			<dscrp>Rotate 32-bit r/m32 right imm8 times without affecting arithmetic flags.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>RORX</mnem>
+			<args>r64,r/m64,imm8</args>
+			<opc openc="RMI">VEX.LZ.F2.0F3A.W1 F0 /r ib</opc>
+			<cpuid>
+				<flag>BMI2</flag>
+			</cpuid>
+			<dscrp>Rotate 64-bit r/m64 right imm8 times without affecting arithmetic flags.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ROUNDPD--Round Packed Double Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ROUNDPD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F 3A 09 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Round packed double precision floating-point values in xmm2/m128 and place the result in xmm1. The rounding mode is determined by imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VROUNDPD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">VEX.128.66.0F3A.WIG 09 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Round packed double-precision floating-point values in xmm2/m128 and place the result in xmm1. The rounding mode is determined by imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VROUNDPD</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.66.0F3A.WIG 09 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Round packed double-precision floating-point values in ymm2/m256 and place the result in ymm1. The rounding mode is determined by imm8.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ROUNDPS--Round Packed Single Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ROUNDPS</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F 3A 08 /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Round packed single precision floating-point values in xmm2/m128 and place the result in xmm1. The rounding mode is determined by imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VROUNDPS</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">VEX.128.66.0F3A.WIG 08 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Round packed single-precision floating-point values in xmm2/m128 and place the result in xmm1. The rounding mode is determined by imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VROUNDPS</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.66.0F3A.WIG 08 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Round packed single-precision floating-point values in ymm2/m256 and place the result in ymm1. The rounding mode is determined by imm8.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ROUNDSD--Round Scalar Double Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ROUNDSD</mnem>
+			<args>xmm1,xmm2/m64,imm8</args>
+			<opc openc="RMI">66 0F 3A 0B /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Round the low packed double precision floating-point value in xmm2/m64 and place the result in xmm1. The rounding mode is determined by imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VROUNDSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64,imm8</args>
+			<opc openc="RVMI">VEX.NDS.LIG.66.0F3A.WIG 0B /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Round the low packed double precision floating-point value in xmm3/m64 and place the result in xmm1. The rounding mode is determined by imm8. Upper packed double precision floating-point value (bits[127:64]) from xmm2 is copied to xmm1[127:64].</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>ROUNDSS--Round Scalar Single Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>ROUNDSS</mnem>
+			<args>xmm1,xmm2/m32,imm8</args>
+			<opc openc="RMI">66 0F 3A 0A /r ib</opc>
+			<cpuid>
+				<flag>SSE4_1</flag>
+			</cpuid>
+			<dscrp>Round the low packed single precision floating-point value in xmm2/m32 and place the result in xmm1. The rounding mode is determined by imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VROUNDSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32,imm8</args>
+			<opc openc="RVMI">VEX.NDS.LIG.66.0F3A.WIG 0A /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Round the low packed single precision floating-point value in xmm3/m32 and place the result in xmm1. The rounding mode is determined by imm8. Also, upper packed single precision floating-point values (bits[127:32]) from xmm2 are copied to xmm1[127:32].</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>RSM--Resume from System Management Mode.</brief>
+		<ins x32m="V" x64m="I">
+			<mnem>RSM</mnem>
+			<args>void</args>
+			<opc openc="NP">0F AA</opc>
+			<dscrp>Resume operation of interrupted program.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>RSQRTPS--Compute Reciprocals of Square Roots of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>RSQRTPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 52 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals of the square roots of the packed single-precision floating-point values in xmm2/m128 and stores the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRTPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.0F.WIG 52 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals of the square roots of packed single-precision values in xmm2/mem and stores the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRTPS</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.0F.WIG 52 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocals of the square roots of packed single-precision values in ymm2/mem and stores the results in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>RSQRTSS--Compute Reciprocal of Square Root of Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>RSQRTSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 52 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal of the square root of the low single-precision floating-point value in xmm2/m32 and stores the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VRSQRTSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.LIG.F3.0F.WIG 52 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes the approximate reciprocal of the square root of the low single precision floating-point value in xmm3/m32 and stores the results in xmm1. Also, upper single precision floating-point values (bits[127:32]) from xmm2 are copied to xmm1[127:32].</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SAHF--Store AH into Flags.</brief>
+		<ins x32m="V" x64m="I*">
+			<mnem>SAHF</mnem>
+			<args>void</args>
+			<opc openc="NP">9E</opc>
+			<dscrp>Loads SF, ZF, AF, PF, and CF from AH into EFLAGS register.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SAL/SAR/SHL/SHR--Shift.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SAL</mnem>
+			<args>r/m8,1</args>
+			<opc openc="M1">D0 /4</opc>
+			<dscrp>Multiply r/m8 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SAL</mnem>
+			<args>r/m8**,1</args>
+			<opc openc="M1">REX + D0 /4</opc>
+			<dscrp>Multiply r/m8 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SAL</mnem>
+			<args>r/m8,CL</args>
+			<opc openc="MC">D2 /4</opc>
+			<dscrp>Multiply r/m8 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SAL</mnem>
+			<args>r/m8**,CL</args>
+			<opc openc="MC">REX + D2 /4</opc>
+			<dscrp>Multiply r/m8 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SAL</mnem>
+			<args>r/m8,imm8</args>
+			<opc openc="MI">C0 /4 ib</opc>
+			<dscrp>Multiply r/m8 by 2, imm8 times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SAL</mnem>
+			<args>r/m8**,imm8</args>
+			<opc openc="MI">REX + C0 /4 ib</opc>
+			<dscrp>Multiply r/m8 by 2, imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SAL</mnem>
+			<args>r/m16,1</args>
+			<opc openc="M1">D1 /4</opc>
+			<dscrp>Multiply r/m16 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SAL</mnem>
+			<args>r/m16,CL</args>
+			<opc openc="MC">D3 /4</opc>
+			<dscrp>Multiply r/m16 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SAL</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">C1 /4 ib</opc>
+			<dscrp>Multiply r/m16 by 2, imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SAL</mnem>
+			<args>r/m32,1</args>
+			<opc openc="M1">D1 /4</opc>
+			<dscrp>Multiply r/m32 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SAL</mnem>
+			<args>r/m64,1</args>
+			<opc openc="M1">REX.W + D1 /4</opc>
+			<dscrp>Multiply r/m64 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SAL</mnem>
+			<args>r/m32,CL</args>
+			<opc openc="MC">D3 /4</opc>
+			<dscrp>Multiply r/m32 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SAL</mnem>
+			<args>r/m64,CL</args>
+			<opc openc="MC">REX.W + D3 /4</opc>
+			<dscrp>Multiply r/m64 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SAL</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">C1 /4 ib</opc>
+			<dscrp>Multiply r/m32 by 2, imm8 times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SAL</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + C1 /4 ib</opc>
+			<dscrp>Multiply r/m64 by 2, imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SAR</mnem>
+			<args>r/m8,1</args>
+			<opc openc="M1">D0 /7</opc>
+			<dscrp>Signed divide* r/m8 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SAR</mnem>
+			<args>r/m8**,1</args>
+			<opc openc="M1">REX + D0 /7</opc>
+			<dscrp>Signed divide* r/m8 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SAR</mnem>
+			<args>r/m8,CL</args>
+			<opc openc="MC">D2 /7</opc>
+			<dscrp>Signed divide* r/m8 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SAR</mnem>
+			<args>r/m8**,CL</args>
+			<opc openc="MC">REX + D2 /7</opc>
+			<dscrp>Signed divide* r/m8 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SAR</mnem>
+			<args>r/m8,imm8</args>
+			<opc openc="MI">C0 /7 ib</opc>
+			<dscrp>Signed divide* r/m8 by 2, imm8 time.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SAR</mnem>
+			<args>r/m8**,imm8</args>
+			<opc openc="MI">REX + C0 /7 ib</opc>
+			<dscrp>Signed divide* r/m8 by 2, imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SAR</mnem>
+			<args>r/m16,1</args>
+			<opc openc="M1">D1 /7</opc>
+			<dscrp>Signed divide* r/m16 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SAR</mnem>
+			<args>r/m16,CL</args>
+			<opc openc="MC">D3 /7</opc>
+			<dscrp>Signed divide* r/m16 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SAR</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">C1 /7 ib</opc>
+			<dscrp>Signed divide* r/m16 by 2, imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SAR</mnem>
+			<args>r/m32,1</args>
+			<opc openc="M1">D1 /7</opc>
+			<dscrp>Signed divide* r/m32 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SAR</mnem>
+			<args>r/m64,1</args>
+			<opc openc="M1">REX.W + D1 /7</opc>
+			<dscrp>Signed divide* r/m64 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SAR</mnem>
+			<args>r/m32,CL</args>
+			<opc openc="MC">D3 /7</opc>
+			<dscrp>Signed divide* r/m32 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SAR</mnem>
+			<args>r/m64,CL</args>
+			<opc openc="MC">REX.W + D3 /7</opc>
+			<dscrp>Signed divide* r/m64 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SAR</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">C1 /7 ib</opc>
+			<dscrp>Signed divide* r/m32 by 2, imm8 times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SAR</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + C1 /7 ib</opc>
+			<dscrp>Signed divide* r/m64 by 2, imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHL</mnem>
+			<args>r/m8,1</args>
+			<opc openc="M1">D0 /4</opc>
+			<dscrp>Multiply r/m8 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SHL</mnem>
+			<args>r/m8**,1</args>
+			<opc openc="M1">REX + D0 /4</opc>
+			<dscrp>Multiply r/m8 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHL</mnem>
+			<args>r/m8,CL</args>
+			<opc openc="MC">D2 /4</opc>
+			<dscrp>Multiply r/m8 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SHL</mnem>
+			<args>r/m8**,CL</args>
+			<opc openc="MC">REX + D2 /4</opc>
+			<dscrp>Multiply r/m8 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHL</mnem>
+			<args>r/m8,imm8</args>
+			<opc openc="MI">C0 /4 ib</opc>
+			<dscrp>Multiply r/m8 by 2, imm8 times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SHL</mnem>
+			<args>r/m8**,imm8</args>
+			<opc openc="MI">REX + C0 /4 ib</opc>
+			<dscrp>Multiply r/m8 by 2, imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHL</mnem>
+			<args>r/m16,1</args>
+			<opc openc="M1">D1 /4</opc>
+			<dscrp>Multiply r/m16 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHL</mnem>
+			<args>r/m16,CL</args>
+			<opc openc="MC">D3 /4</opc>
+			<dscrp>Multiply r/m16 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHL</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">C1 /4 ib</opc>
+			<dscrp>Multiply r/m16 by 2, imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHL</mnem>
+			<args>r/m32,1</args>
+			<opc openc="M1">D1 /4</opc>
+			<dscrp>Multiply r/m32 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SHL</mnem>
+			<args>r/m64,1</args>
+			<opc openc="M1">REX.W + D1 /4</opc>
+			<dscrp>Multiply r/m64 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHL</mnem>
+			<args>r/m32,CL</args>
+			<opc openc="MC">D3 /4</opc>
+			<dscrp>Multiply r/m32 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SHL</mnem>
+			<args>r/m64,CL</args>
+			<opc openc="MC">REX.W + D3 /4</opc>
+			<dscrp>Multiply r/m64 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHL</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">C1 /4 ib</opc>
+			<dscrp>Multiply r/m32 by 2, imm8 times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SHL</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + C1 /4 ib</opc>
+			<dscrp>Multiply r/m64 by 2, imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHR</mnem>
+			<args>r/m8,1</args>
+			<opc openc="M1">D0 /5</opc>
+			<dscrp>Unsigned divide r/m8 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SHR</mnem>
+			<args>r/m8**,1</args>
+			<opc openc="M1">REX + D0 /5</opc>
+			<dscrp>Unsigned divide r/m8 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHR</mnem>
+			<args>r/m8,CL</args>
+			<opc openc="MC">D2 /5</opc>
+			<dscrp>Unsigned divide r/m8 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SHR</mnem>
+			<args>r/m8**,CL</args>
+			<opc openc="MC">REX + D2 /5</opc>
+			<dscrp>Unsigned divide r/m8 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHR</mnem>
+			<args>r/m8,imm8</args>
+			<opc openc="MI">C0 /5 ib</opc>
+			<dscrp>Unsigned divide r/m8 by 2, imm8 times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SHR</mnem>
+			<args>r/m8**,imm8</args>
+			<opc openc="MI">REX + C0 /5 ib</opc>
+			<dscrp>Unsigned divide r/m8 by 2, imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHR</mnem>
+			<args>r/m16,1</args>
+			<opc openc="M1">D1 /5</opc>
+			<dscrp>Unsigned divide r/m16 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHR</mnem>
+			<args>r/m16,CL</args>
+			<opc openc="MC">D3 /5</opc>
+			<dscrp>Unsigned divide r/m16 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHR</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">C1 /5 ib</opc>
+			<dscrp>Unsigned divide r/m16 by 2, imm8 times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHR</mnem>
+			<args>r/m32,1</args>
+			<opc openc="M1">D1 /5</opc>
+			<dscrp>Unsigned divide r/m32 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SHR</mnem>
+			<args>r/m64,1</args>
+			<opc openc="M1">REX.W + D1 /5</opc>
+			<dscrp>Unsigned divide r/m64 by 2, once.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHR</mnem>
+			<args>r/m32,CL</args>
+			<opc openc="MC">D3 /5</opc>
+			<dscrp>Unsigned divide r/m32 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SHR</mnem>
+			<args>r/m64,CL</args>
+			<opc openc="MC">REX.W + D3 /5</opc>
+			<dscrp>Unsigned divide r/m64 by 2, CL times.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHR</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">C1 /5 ib</opc>
+			<dscrp>Unsigned divide r/m32 by 2, imm8 times.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SHR</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + C1 /5 ib</opc>
+			<dscrp>Unsigned divide r/m64 by 2, imm8 times.</dscrp>
+		</ins>
+		<oprndenc openc="M1">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>1</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MC">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>CL</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>imm8(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SARX/SHLX/SHRX--Shift Without Affecting Flags.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SARX</mnem>
+			<args>r32a,r/m32,r32b</args>
+			<opc openc="RMV">VEX.NDS.LZ.F3.0F38.W0 F7 /r</opc>
+			<cpuid>
+				<flag>BMI2</flag>
+			</cpuid>
+			<dscrp>Shift r/m32 arithmetically right with count specified in r32b.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHLX</mnem>
+			<args>r32a,r/m32,r32b</args>
+			<opc openc="RMV">VEX.NDS.LZ.66.0F38.W0 F7 /r</opc>
+			<cpuid>
+				<flag>BMI2</flag>
+			</cpuid>
+			<dscrp>Shift r/m32 logically left with count specified in r32b.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHRX</mnem>
+			<args>r32a,r/m32,r32b</args>
+			<opc openc="RMV">VEX.NDS.LZ.F2.0F38.W0 F7 /r</opc>
+			<cpuid>
+				<flag>BMI2</flag>
+			</cpuid>
+			<dscrp>Shift r/m32 logically right with count specified in r32b.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SARX</mnem>
+			<args>r64a,r/m64,r64b</args>
+			<opc openc="RMV">VEX.NDS.LZ.F3.0F38.W1 F7 /r</opc>
+			<cpuid>
+				<flag>BMI2</flag>
+			</cpuid>
+			<dscrp>Shift r/m64 arithmetically right with count specified in r64b.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SHLX</mnem>
+			<args>r64a,r/m64,r64b</args>
+			<opc openc="RMV">VEX.NDS.LZ.66.0F38.W1 F7 /r</opc>
+			<cpuid>
+				<flag>BMI2</flag>
+			</cpuid>
+			<dscrp>Shift r/m64 logically left with count specified in r64b.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SHRX</mnem>
+			<args>r64a,r/m64,r64b</args>
+			<opc openc="RMV">VEX.NDS.LZ.F2.0F38.W1 F7 /r</opc>
+			<cpuid>
+				<flag>BMI2</flag>
+			</cpuid>
+			<dscrp>Shift r/m64 logically right with count specified in r64b.</dscrp>
+		</ins>
+		<oprndenc openc="RMV">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>VEX.vvvv(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SBB--Integer Subtraction with Borrow.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SBB</mnem>
+			<args>AL,imm8</args>
+			<opc openc="I">1C ib</opc>
+			<dscrp>Subtract with borrow imm8 from AL.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SBB</mnem>
+			<args>AX,imm16</args>
+			<opc openc="I">1D iw</opc>
+			<dscrp>Subtract with borrow imm16 from AX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SBB</mnem>
+			<args>EAX,imm32</args>
+			<opc openc="I">1D id</opc>
+			<dscrp>Subtract with borrow imm32 from EAX.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SBB</mnem>
+			<args>RAX,imm32</args>
+			<opc openc="I">REX.W + 1D id</opc>
+			<dscrp>Subtract with borrow sign-extended imm.32 to 64-bits from RAX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SBB</mnem>
+			<args>r/m8,imm8</args>
+			<opc openc="MI">80 /3 ib</opc>
+			<dscrp>Subtract with borrow imm8 from r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SBB</mnem>
+			<args>r/m8*,imm8</args>
+			<opc openc="MI">REX + 80 /3 ib</opc>
+			<dscrp>Subtract with borrow imm8 from r/m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SBB</mnem>
+			<args>r/m16,imm16</args>
+			<opc openc="MI">81 /3 iw</opc>
+			<dscrp>Subtract with borrow imm16 from r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SBB</mnem>
+			<args>r/m32,imm32</args>
+			<opc openc="MI">81 /3 id</opc>
+			<dscrp>Subtract with borrow imm32 from r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SBB</mnem>
+			<args>r/m64,imm32</args>
+			<opc openc="MI">REX.W + 81 /3 id</opc>
+			<dscrp>Subtract with borrow sign-extended imm32 to 64-bits from r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SBB</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">83 /3 ib</opc>
+			<dscrp>Subtract with borrow sign-extended imm8 from r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SBB</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">83 /3 ib</opc>
+			<dscrp>Subtract with borrow sign-extended imm8 from r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SBB</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + 83 /3 ib</opc>
+			<dscrp>Subtract with borrow sign-extended imm8 from r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SBB</mnem>
+			<args>r/m8,r8</args>
+			<opc openc="MR">18 /r</opc>
+			<dscrp>Subtract with borrow r8 from r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SBB</mnem>
+			<args>r/m8*,r8</args>
+			<opc openc="MR">REX + 18 /r</opc>
+			<dscrp>Subtract with borrow r8 from r/m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SBB</mnem>
+			<args>r/m16,r16</args>
+			<opc openc="MR">19 /r</opc>
+			<dscrp>Subtract with borrow r16 from r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SBB</mnem>
+			<args>r/m32,r32</args>
+			<opc openc="MR">19 /r</opc>
+			<dscrp>Subtract with borrow r32 from r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SBB</mnem>
+			<args>r/m64,r64</args>
+			<opc openc="MR">REX.W + 19 /r</opc>
+			<dscrp>Subtract with borrow r64 from r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SBB</mnem>
+			<args>r8,r/m8</args>
+			<opc openc="RM">1A /r</opc>
+			<dscrp>Subtract with borrow r/m8 from r8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SBB</mnem>
+			<args>r8*,r/m8*</args>
+			<opc openc="RM">REX + 1A /r</opc>
+			<dscrp>Subtract with borrow r/m8 from r8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SBB</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">1B /r</opc>
+			<dscrp>Subtract with borrow r/m16 from r16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SBB</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">1B /r</opc>
+			<dscrp>Subtract with borrow r/m32 from r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SBB</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 1B /r</opc>
+			<dscrp>Subtract with borrow r/m64 from r64.</dscrp>
+		</ins>
+		<oprndenc openc="I">
+			<oprnd1>AL/AX/EAX/RAX</oprnd1>
+			<oprnd2>imm8(r)/16/32</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>imm8(r)/16/32</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SCAS/SCASB/SCASW/SCASD--Scan String.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SCAS</mnem>
+			<args>m8</args>
+			<opc openc="NP">AE</opc>
+			<dscrp>Compare AL with byte at ES:(E)DI or RDI, then set status flags.*.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SCAS</mnem>
+			<args>m16</args>
+			<opc openc="NP">AF</opc>
+			<dscrp>Compare AX with word at ES:(E)DI or RDI, then set status flags.*.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SCAS</mnem>
+			<args>m32</args>
+			<opc openc="NP">AF</opc>
+			<dscrp>Compare EAX with doubleword at ES(E)DI or RDI then set status flags.*.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SCAS</mnem>
+			<args>m64</args>
+			<opc openc="NP">REX.W + AF</opc>
+			<dscrp>Compare RAX with quadword at RDI or EDI then set status flags.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SCASB</mnem>
+			<args>void</args>
+			<opc openc="NP">AE</opc>
+			<dscrp>Compare AL with byte at ES:(E)DI or RDI then set status flags.*.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SCASW</mnem>
+			<args>void</args>
+			<opc openc="NP">AF</opc>
+			<dscrp>Compare AX with word at ES:(E)DI or RDI then set status flags.*.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SCASD</mnem>
+			<args>void</args>
+			<opc openc="NP">AF</opc>
+			<dscrp>Compare EAX with doubleword at ES:(E)DI or RDI then set status flags.*.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SCASQ</mnem>
+			<args>void</args>
+			<opc openc="NP">REX.W + AF</opc>
+			<dscrp>Compare RAX with quadword at RDI or EDI then set status flags.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SETcc--Set Byte on Condition.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SETA</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 97</opc>
+			<dscrp>Set byte if above (CF=0 and ZF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETA</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 97</opc>
+			<dscrp>Set byte if above (CF=0 and ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETAE</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 93</opc>
+			<dscrp>Set byte if above or equal (CF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETAE</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 93</opc>
+			<dscrp>Set byte if above or equal (CF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETB</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 92</opc>
+			<dscrp>Set byte if below (CF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETB</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 92</opc>
+			<dscrp>Set byte if below (CF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETBE</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 96</opc>
+			<dscrp>Set byte if below or equal (CF=1 or ZF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETBE</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 96</opc>
+			<dscrp>Set byte if below or equal (CF=1 or ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETC</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 92</opc>
+			<dscrp>Set byte if carry (CF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETC</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 92</opc>
+			<dscrp>Set byte if carry (CF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETE</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 94</opc>
+			<dscrp>Set byte if equal (ZF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETE</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 94</opc>
+			<dscrp>Set byte if equal (ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETG</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 9F</opc>
+			<dscrp>Set byte if greater (ZF=0 and SF=OF).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETG</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 9F</opc>
+			<dscrp>Set byte if greater (ZF=0 and SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETGE</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 9D</opc>
+			<dscrp>Set byte if greater or equal (SF=OF).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETGE</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 9D</opc>
+			<dscrp>Set byte if greater or equal (SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETL</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 9C</opc>
+			<dscrp>Set byte if less (SF != OF).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETL</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 9C</opc>
+			<dscrp>Set byte if less (SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETLE</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 9E</opc>
+			<dscrp>Set byte if less or equal (ZF=1 or SF != OF).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETLE</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 9E</opc>
+			<dscrp>Set byte if less or equal (ZF=1 or SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETNA</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 96</opc>
+			<dscrp>Set byte if not above (CF=1 or ZF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETNA</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 96</opc>
+			<dscrp>Set byte if not above (CF=1 or ZF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETNAE</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 92</opc>
+			<dscrp>Set byte if not above or equal (CF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETNAE</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 92</opc>
+			<dscrp>Set byte if not above or equal (CF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETNB</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 93</opc>
+			<dscrp>Set byte if not below (CF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETNB</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 93</opc>
+			<dscrp>Set byte if not below (CF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETNBE</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 97</opc>
+			<dscrp>Set byte if not below or equal (CF=0 and ZF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETNBE</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 97</opc>
+			<dscrp>Set byte if not below or equal (CF=0 and ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETNC</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 93</opc>
+			<dscrp>Set byte if not carry (CF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETNC</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 93</opc>
+			<dscrp>Set byte if not carry (CF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETNE</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 95</opc>
+			<dscrp>Set byte if not equal (ZF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETNE</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 95</opc>
+			<dscrp>Set byte if not equal (ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETNG</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 9E</opc>
+			<dscrp>Set byte if not greater (ZF=1 or SF != OF).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETNG</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 9E</opc>
+			<dscrp>Set byte if not greater (ZF=1 or SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETNGE</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 9C</opc>
+			<dscrp>Set byte if not greater or equal (SF != OF).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETNGE</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 9C</opc>
+			<dscrp>Set byte if not greater or equal (SF != OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETNL</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 9D</opc>
+			<dscrp>Set byte if not less (SF=OF).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETNL</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 9D</opc>
+			<dscrp>Set byte if not less (SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETNLE</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 9F</opc>
+			<dscrp>Set byte if not less or equal (ZF=0 and SF=OF).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETNLE</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 9F</opc>
+			<dscrp>Set byte if not less or equal (ZF=0 and SF=OF).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETNO</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 91</opc>
+			<dscrp>Set byte if not overflow (OF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETNO</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 91</opc>
+			<dscrp>Set byte if not overflow (OF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETNP</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 9B</opc>
+			<dscrp>Set byte if not parity (PF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETNP</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 9B</opc>
+			<dscrp>Set byte if not parity (PF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETNS</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 99</opc>
+			<dscrp>Set byte if not sign (SF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETNS</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 99</opc>
+			<dscrp>Set byte if not sign (SF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETNZ</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 95</opc>
+			<dscrp>Set byte if not zero (ZF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETNZ</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 95</opc>
+			<dscrp>Set byte if not zero (ZF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETO</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 90</opc>
+			<dscrp>Set byte if overflow (OF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETO</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 90</opc>
+			<dscrp>Set byte if overflow (OF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETP</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 9A</opc>
+			<dscrp>Set byte if parity (PF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETP</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 9A</opc>
+			<dscrp>Set byte if parity (PF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETPE</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 9A</opc>
+			<dscrp>Set byte if parity even (PF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETPE</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 9A</opc>
+			<dscrp>Set byte if parity even (PF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETPO</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 9B</opc>
+			<dscrp>Set byte if parity odd (PF=0).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETPO</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 9B</opc>
+			<dscrp>Set byte if parity odd (PF=0).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETS</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 98</opc>
+			<dscrp>Set byte if sign (SF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETS</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 98</opc>
+			<dscrp>Set byte if sign (SF=1).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SETZ</mnem>
+			<args>r/m8</args>
+			<opc openc="M">0F 94</opc>
+			<dscrp>Set byte if zero (ZF=1).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SETZ</mnem>
+			<args>r/m8*</args>
+			<opc openc="M">REX + 0F 94</opc>
+			<dscrp>Set byte if zero (ZF=1).</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SFENCE--Store Fence.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SFENCE</mnem>
+			<args>void</args>
+			<opc openc="NP">0F AE F8</opc>
+			<dscrp>Serializes store operations.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SGDT--Store Global Descriptor Table Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SGDT</mnem>
+			<args>m</args>
+			<opc openc="M">0F 01 /0</opc>
+			<dscrp>Store GDTR to m.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHLD--Double Precision Shift Left.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHLD</mnem>
+			<args>r/m16,r16,imm8</args>
+			<opc openc="MRI">0F A4 /r ib</opc>
+			<dscrp>Shift r/m16 to left imm8 places while shifting bits from r16 in from the right.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHLD</mnem>
+			<args>r/m16,r16,CL</args>
+			<opc openc="MRC">0F A5 /r</opc>
+			<dscrp>Shift r/m16 to left CL places while shifting bits from r16 in from the right.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHLD</mnem>
+			<args>r/m32,r32,imm8</args>
+			<opc openc="MRI">0F A4 /r ib</opc>
+			<dscrp>Shift r/m32 to left imm8 places while shifting bits from r32 in from the right.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SHLD</mnem>
+			<args>r/m64,r64,imm8</args>
+			<opc openc="MRI">REX.W + 0F A4 /r ib</opc>
+			<dscrp>Shift r/m64 to left imm8 places while shifting bits from r64 in from the right.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHLD</mnem>
+			<args>r/m32,r32,CL</args>
+			<opc openc="MRC">0F A5 /r</opc>
+			<dscrp>Shift r/m32 to left CL places while shifting bits from r32 in from the right.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SHLD</mnem>
+			<args>r/m64,r64,CL</args>
+			<opc openc="MRC">REX.W + 0F A5 /r</opc>
+			<dscrp>Shift r/m64 to left CL places while shifting bits from r64 in from the right.</dscrp>
+		</ins>
+		<oprndenc openc="MRI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MRC">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>CL</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHRD--Double Precision Shift Right.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHRD</mnem>
+			<args>r/m16,r16,imm8</args>
+			<opc openc="MRI">0F AC /r ib</opc>
+			<dscrp>Shift r/m16 to right imm8 places while shifting bits from r16 in from the left.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHRD</mnem>
+			<args>r/m16,r16,CL</args>
+			<opc openc="MRC">0F AD /r</opc>
+			<dscrp>Shift r/m16 to right CL places while shifting bits from r16 in from the left.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHRD</mnem>
+			<args>r/m32,r32,imm8</args>
+			<opc openc="MRI">0F AC /r ib</opc>
+			<dscrp>Shift r/m32 to right imm8 places while shifting bits from r32 in from the left.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SHRD</mnem>
+			<args>r/m64,r64,imm8</args>
+			<opc openc="MRI">REX.W + 0F AC /r ib</opc>
+			<dscrp>Shift r/m64 to right imm8 places while shifting bits from r64 in from the left.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SHRD</mnem>
+			<args>r/m32,r32,CL</args>
+			<opc openc="MRC">0F AD /r</opc>
+			<dscrp>Shift r/m32 to right CL places while shifting bits from r32 in from the left.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SHRD</mnem>
+			<args>r/m64,r64,CL</args>
+			<opc openc="MRC">REX.W + 0F AD /r</opc>
+			<dscrp>Shift r/m64 to right CL places while shifting bits from r64 in from the left.</dscrp>
+		</ins>
+		<oprndenc openc="MRI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MRC">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>CL</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHUFPD--Shuffle Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHUFPD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">66 0F C6 /r ib</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Shuffle packed double-precision floatingpoint values selected by imm8 from xmm1 and xmm2/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F.WIG C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle Packed double-precision floatingpoint values selected by imm8 from xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F.WIG C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle Packed double-precision floatingpoint values selected by imm8 from ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SHUFPS--Shuffle Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SHUFPS</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">0F C6 /r ib</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Shuffle packed single-precision floating-point values selected by imm8 from xmm1 and xmm1/m128 to xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.0F.WIG C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle Packed single-precision floating-point values selected by imm8 from xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSHUFPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.0F.WIG C6 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Shuffle Packed single-precision floating-point values selected by imm8 from ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SIDT--Store Interrupt Descriptor Table Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SIDT</mnem>
+			<args>m</args>
+			<opc openc="M">0F 01 /1</opc>
+			<dscrp>Store IDTR to m.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SLDT--Store Local Descriptor Table Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SLDT</mnem>
+			<args>r/m16</args>
+			<opc openc="M">0F 00 /0</opc>
+			<dscrp>Stores segment selector from LDTR in r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SLDT</mnem>
+			<args>r64/m16</args>
+			<opc openc="M">REX.W + 0F 00 /0</opc>
+			<dscrp>Stores segment selector from LDTR in r64/m16.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SMSW--Store Machine Status Word.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SMSW</mnem>
+			<args>r/m16</args>
+			<opc openc="M">0F 01 /4</opc>
+			<dscrp>Store machine status word to r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SMSW</mnem>
+			<args>r32/m16</args>
+			<opc openc="M">0F 01 /4</opc>
+			<dscrp>Store machine status word in low-order 16 bits of r32/m16; high-order 16 bits of r32 are undefined.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SMSW</mnem>
+			<args>r64/m16</args>
+			<opc openc="M">REX.W + 0F 01 /4</opc>
+			<dscrp>Store machine status word in low-order 16 bits of r64/m16; high-order 16 bits of r32 are undefined.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SQRTPD--Compute Square Roots of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SQRTPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 51 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Computes square roots of the packed doubleprecision floating-point values in xmm2/m128 and stores the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F.WIG 51 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed doubleprecision floating-point values in xmm2/m128 and stores the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPD</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F.WIG 51/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed doubleprecision floating-point values in ymm2/m256 and stores the result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SQRTPS--Compute Square Roots of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SQRTPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 51 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Computes square roots of the packed singleprecision floating-point values in xmm2/m128 and stores the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.0F.WIG 51 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed singleprecision floating-point values in xmm2/m128 and stores the result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTPS</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.0F.WIG 51/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes Square Roots of the packed singleprecision floating-point values in ymm2/m256 and stores the result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SQRTSD--Compute Square Root of Scalar Double-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SQRTSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 51 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Computes square root of the low doubleprecision floating-point value in xmm2/m64 and stores the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.LIG.F2.0F.WIG 51/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes square root of the low doubleprecision floating point value in xmm3/m64 and stores the results in xmm2. Also, upper double precision floating-point value (bits[127:64]) from xmm2 are copied to xmm1[127:64].</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SQRTSS--Compute Square Root of Scalar Single-Precision Floating-Point Value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SQRTSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 51 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Computes square root of the low singleprecision floating-point value in xmm2/m32 and stores the results in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSQRTSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.LIG.F3.0F.WIG 51/r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Computes square root of the low singleprecision floating-point value in xmm3/m32 and stores the results in xmm1. Also, upper single precision floating-point values (bits[127:32]) from xmm2 are copied to xmm1[127:32].</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>STAC--Set AC Flag in EFLAGS Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>STAC</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 01 CB</opc>
+			<dscrp>Set the AC flag in the EFLAGS register.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>STC--Set Carry Flag.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>STC</mnem>
+			<args>void</args>
+			<opc openc="NP">F9</opc>
+			<dscrp>Set CF flag.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>STD--Set Direction Flag.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>STD</mnem>
+			<args>void</args>
+			<opc openc="NP">FD</opc>
+			<dscrp>Set DF flag.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>STI--Set Interrupt Flag.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>STI</mnem>
+			<args>void</args>
+			<opc openc="NP">FB</opc>
+			<dscrp>Set interrupt flag; external, maskable interrupts enabled at the end of the next instruction.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>STMXCSR--Store MXCSR Register State.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>STMXCSR</mnem>
+			<args>m32</args>
+			<opc openc="M">0F AE /3</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Store contents of MXCSR register to m32.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSTMXCSR</mnem>
+			<args>m32</args>
+			<opc openc="M">VEX.LZ.0F.WIG AE /3</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Store contents of MXCSR register to m32.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>STOS/STOSB/STOSW/STOSD/STOSQ--Store String.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>STOS</mnem>
+			<args>m8</args>
+			<opc openc="NA">AA</opc>
+			<dscrp>For legacy mode, store AL at address ES:(E)DI; For 64-bit mode store AL at address RDI or EDI.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>STOS</mnem>
+			<args>m16</args>
+			<opc openc="NA">AB</opc>
+			<dscrp>For legacy mode, store AX at address ES:(E)DI; For 64-bit mode store AX at address RDI or EDI.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>STOS</mnem>
+			<args>m32</args>
+			<opc openc="NA">AB</opc>
+			<dscrp>For legacy mode, store EAX at address ES:(E)DI; For 64-bit mode store EAX at address RDI or EDI.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>STOS</mnem>
+			<args>m64</args>
+			<opc openc="NA">REX.W + AB</opc>
+			<dscrp>Store RAX at address RDI or EDI.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>STOSB</mnem>
+			<args>void</args>
+			<opc openc="NA">AA</opc>
+			<dscrp>For legacy mode, store AL at address ES:(E)DI; For 64-bit mode store AL at address RDI or EDI.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>STOSW</mnem>
+			<args>void</args>
+			<opc openc="NA">AB</opc>
+			<dscrp>For legacy mode, store AX at address ES:(E)DI; For 64-bit mode store AX at address RDI or EDI.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>STOSD</mnem>
+			<args>void</args>
+			<opc openc="NA">AB</opc>
+			<dscrp>For legacy mode, store EAX at address ES:(E)DI; For 64-bit mode store EAX at address RDI or EDI.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>STOSQ</mnem>
+			<args>void</args>
+			<opc openc="NA">REX.W + AB</opc>
+			<dscrp>Store RAX at address RDI or EDI.</dscrp>
+		</ins>
+		<oprndenc openc="NA">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>STR--Store Task Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>STR</mnem>
+			<args>r/m16</args>
+			<opc openc="M">0F 00 /1</opc>
+			<dscrp>Stores segment selector from TR in r/m16.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SUB--Subtract.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SUB</mnem>
+			<args>AL,imm8</args>
+			<opc openc="I">2C ib</opc>
+			<dscrp>Subtract imm8 from AL.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SUB</mnem>
+			<args>AX,imm16</args>
+			<opc openc="I">2D iw</opc>
+			<dscrp>Subtract imm16 from AX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SUB</mnem>
+			<args>EAX,imm32</args>
+			<opc openc="I">2D id</opc>
+			<dscrp>Subtract imm32 from EAX.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SUB</mnem>
+			<args>RAX,imm32</args>
+			<opc openc="I">REX.W + 2D id</opc>
+			<dscrp>Subtract imm32 sign-extended to 64-bits from RAX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SUB</mnem>
+			<args>r/m8,imm8</args>
+			<opc openc="MI">80 /5 ib</opc>
+			<dscrp>Subtract imm8 from r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SUB</mnem>
+			<args>r/m8*,imm8</args>
+			<opc openc="MI">REX + 80 /5 ib</opc>
+			<dscrp>Subtract imm8 from r/m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SUB</mnem>
+			<args>r/m16,imm16</args>
+			<opc openc="MI">81 /5 iw</opc>
+			<dscrp>Subtract imm16 from r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SUB</mnem>
+			<args>r/m32,imm32</args>
+			<opc openc="MI">81 /5 id</opc>
+			<dscrp>Subtract imm32 from r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SUB</mnem>
+			<args>r/m64,imm32</args>
+			<opc openc="MI">REX.W + 81 /5 id</opc>
+			<dscrp>Subtract imm32 sign-extended to 64-bits from r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SUB</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">83 /5 ib</opc>
+			<dscrp>Subtract sign-extended imm8 from r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SUB</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">83 /5 ib</opc>
+			<dscrp>Subtract sign-extended imm8 from r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SUB</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + 83 /5 ib</opc>
+			<dscrp>Subtract sign-extended imm8 from r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SUB</mnem>
+			<args>r/m8,r8</args>
+			<opc openc="MR">28 /r</opc>
+			<dscrp>Subtract r8 from r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SUB</mnem>
+			<args>r/m8*,r8*</args>
+			<opc openc="MR">REX + 28 /r</opc>
+			<dscrp>Subtract r8 from r/m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SUB</mnem>
+			<args>r/m16,r16</args>
+			<opc openc="MR">29 /r</opc>
+			<dscrp>Subtract r16 from r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SUB</mnem>
+			<args>r/m32,r32</args>
+			<opc openc="MR">29 /r</opc>
+			<dscrp>Subtract r32 from r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SUB</mnem>
+			<args>r/m64,r64</args>
+			<opc openc="MR">REX.W + 29 /r</opc>
+			<dscrp>Subtract r64 from r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SUB</mnem>
+			<args>r8,r/m8</args>
+			<opc openc="RM">2A /r</opc>
+			<dscrp>Subtract r/m8 from r8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SUB</mnem>
+			<args>r8*,r/m8*</args>
+			<opc openc="RM">REX + 2A /r</opc>
+			<dscrp>Subtract r/m8 from r8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SUB</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">2B /r</opc>
+			<dscrp>Subtract r/m16 from r16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SUB</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">2B /r</opc>
+			<dscrp>Subtract r/m32 from r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>SUB</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 2B /r</opc>
+			<dscrp>Subtract r/m64 from r64.</dscrp>
+		</ins>
+		<oprndenc openc="I">
+			<oprnd1>AL/AX/EAX/RAX</oprnd1>
+			<oprnd2>imm8(r)/26/32</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>imm8(r)/26/32</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SUBPD--Subtract Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SUBPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 5C /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtract packed double-precision floatingpoint values in xmm2/m128 from xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 5C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed double-precision floatingpoint values in xmm3/mem from xmm2 and stores result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 5C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed double-precision floatingpoint values in ymm3/mem from ymm2 and stores result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SUBPS--Subtract Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SUBPS</mnem>
+			<args>xmm1 xmm2/m128</args>
+			<opc openc="RM">0F 5C /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Subtract packed single-precision floating-point values in xmm2/mem from xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 5C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed single-precision floating-point values in xmm3/mem from xmm2 and stores result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 5C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract packed single-precision floating-point values in ymm3/mem from ymm2 and stores result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SUBSD--Subtract Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SUBSD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">F2 0F 5C /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Subtracts the low double-precision floatingpoint values in xmm2/mem64 from xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBSD</mnem>
+			<args>xmm1,xmm2,xmm3/m64</args>
+			<opc openc="RVM">VEX.NDS.LIG.F2.0F.WIG 5C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract the low double-precision floatingpoint value in xmm3/mem from xmm2 and store the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SUBSS--Subtract Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SUBSS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">F3 0F 5C /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Subtract the lower single-precision floatingpoint values in xmm2/m32 from xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VSUBSS</mnem>
+			<args>xmm1,xmm2,xmm3/m32</args>
+			<opc openc="RVM">VEX.NDS.LIG.F3.0F.WIG 5C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Subtract the low single-precision floatingpoint value in xmm3/mem from xmm2 and store the result in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SWAPGS--Swap GS Base Register.</brief>
+		<ins x32m="I" x64m="V">
+			<mnem>SWAPGS</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 01 F8</opc>
+			<dscrp>Exchanges the current GS base register value with the value contained in MSR address C0000102H.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SYSCALL--Fast System Call.</brief>
+		<ins x32m="I" x64m="V">
+			<mnem>SYSCALL</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 05</opc>
+			<dscrp>Fast call to privilege level 0 system procedures.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SYSENTER--Fast System Call.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SYSENTER</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 34</opc>
+			<dscrp>Fast call to privilege level 0 system procedures.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SYSEXIT--Fast Return from Fast System Call.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>SYSEXIT</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 35</opc>
+			<dscrp>Fast return to privilege level 3 user code.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>SYSEXIT</mnem>
+			<args>void</args>
+			<opc openc="NP">REX.W + 0F 35</opc>
+			<dscrp>Fast return to 64-bit mode privilege level 3 user code.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>SYSRET--Return From Fast System Call.</brief>
+		<ins x32m="I" x64m="V">
+			<mnem>SYSRET</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 07</opc>
+			<dscrp>Return to compatibility mode from fast system call.</dscrp>
+		</ins>
+		<ins x32m="I" x64m="V">
+			<mnem>SYSRET</mnem>
+			<args>void</args>
+			<opc openc="NP">REX.W + 0F 07</opc>
+			<dscrp>Return to 64-bit mode from fast system call.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>TEST--Logical Compare.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>TEST</mnem>
+			<args>AL,imm8</args>
+			<opc openc="I">A8 ib</opc>
+			<dscrp>AND imm8 with AL; set SF, ZF, PF according to result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>TEST</mnem>
+			<args>AX,imm16</args>
+			<opc openc="I">A9 iw</opc>
+			<dscrp>AND imm16 with AX; set SF, ZF, PF according to result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>TEST</mnem>
+			<args>EAX,imm32</args>
+			<opc openc="I">A9 id</opc>
+			<dscrp>AND imm32 with EAX; set SF, ZF, PF according to result.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>TEST</mnem>
+			<args>RAX,imm32</args>
+			<opc openc="I">REX.W + A9 id</opc>
+			<dscrp>AND imm32 sign-extended to 64-bits with RAX; set SF, ZF, PF according to result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>TEST</mnem>
+			<args>r/m8,imm8</args>
+			<opc openc="MI">F6 /0 ib</opc>
+			<dscrp>AND imm8 with r/m8; set SF, ZF, PF according to result.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>TEST</mnem>
+			<args>r/m8*,imm8</args>
+			<opc openc="MI">REX + F6 /0 ib</opc>
+			<dscrp>AND imm8 with r/m8; set SF, ZF, PF according to result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>TEST</mnem>
+			<args>r/m16,imm16</args>
+			<opc openc="MI">F7 /0 iw</opc>
+			<dscrp>AND imm16 with r/m16; set SF, ZF, PF according to result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>TEST</mnem>
+			<args>r/m32,imm32</args>
+			<opc openc="MI">F7 /0 id</opc>
+			<dscrp>AND imm32 with r/m32; set SF, ZF, PF according to result.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>TEST</mnem>
+			<args>r/m64,imm32</args>
+			<opc openc="MI">REX.W + F7 /0 id</opc>
+			<dscrp>AND imm32 sign-extended to 64-bits with r/m64; set SF, ZF, PF according to result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>TEST</mnem>
+			<args>r/m8,r8</args>
+			<opc openc="MR">84 /r</opc>
+			<dscrp>AND r8 with r/m8; set SF, ZF, PF according to result.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>TEST</mnem>
+			<args>r/m8*,r8*</args>
+			<opc openc="MR">REX + 84 /r</opc>
+			<dscrp>AND r8 with r/m8; set SF, ZF, PF according to result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>TEST</mnem>
+			<args>r/m16,r16</args>
+			<opc openc="MR">85 /r</opc>
+			<dscrp>AND r16 with r/m16; set SF, ZF, PF according to result.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>TEST</mnem>
+			<args>r/m32,r32</args>
+			<opc openc="MR">85 /r</opc>
+			<dscrp>AND r32 with r/m32; set SF, ZF, PF according to result.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>TEST</mnem>
+			<args>r/m64,r64</args>
+			<opc openc="MR">REX.W + 85 /r</opc>
+			<dscrp>AND r64 with r/m64; set SF, ZF, PF according to result.</dscrp>
+		</ins>
+		<oprndenc openc="I">
+			<oprnd1>AL/AX/EAX/RAX</oprnd1>
+			<oprnd2>imm8(r)/16/32</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>imm8(r)/16/32</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>TZCNT--Count the Number of Trailing Zero Bits.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>TZCNT</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">F3 0F BC /r</opc>
+			<cpuid>
+				<flag>BMI1</flag>
+			</cpuid>
+			<dscrp>Count the number of trailing zero bits in r/m16, return result in r16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>TZCNT</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">F3 0F BC /r</opc>
+			<cpuid>
+				<flag>BMI1</flag>
+			</cpuid>
+			<dscrp>Count the number of trailing zero bits in r/m32, return result in r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>TZCNT</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">F3 REX.W 0F BC /r</opc>
+			<cpuid>
+				<flag>BMI1</flag>
+			</cpuid>
+			<dscrp>Count the number of trailing zero bits in r/m64, return result in r64.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UCOMISD--Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UCOMISD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">66 0F 2E /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Compares (unordered) the low doubleprecision floating-point values in xmm1 and xmm2/m64 and set the EFLAGS accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUCOMISD</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.LIG.66.0F.WIG 2E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare low double precision floating-point values in xmm1 and xmm2/mem64 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UCOMISS--Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UCOMISS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">0F 2E /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Compare lower single-precision floating-point value in xmm1 register with lower singleprecision floating-point value in xmm2/mem and set the status flags accordingly.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUCOMISS</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.LIG.0F.WIG 2E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Compare low single precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UD2--Undefined Instruction.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UD2</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 0B</opc>
+			<dscrp>Raise invalid opcode exception.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UNPCKHPD--Unpack and Interleave High Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UNPCKHPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 15 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double-precision floating-point values from high quadwords of xmm1 and xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 15 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double precision floating-point values from high quadwords of xmm2 and xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 15 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double precision floating-point values from high quadwords of ymm2 and ymm3/m256.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UNPCKHPS--Unpack and Interleave High Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UNPCKHPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 15 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of xmm1 and xmm2/mem into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 15 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of xmm2 and xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKHPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 15 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of ymm2 and ymm3/m256.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UNPCKLPD--Unpack and Interleave Low Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UNPCKLPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 14 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double-precision floating-point values from low quadwords of xmm1 and xmm2/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 14 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double precision floating-point values low high quadwords of xmm2 and xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 14 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves double precision floating-point values low high quadwords of ymm2 and ymm3/m256.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>UNPCKLPS--Unpack and Interleave Low Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>UNPCKLPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 14 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of xmm1 and xmm2/mem into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 14 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of xmm2 and xmm3/m128.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VUNPCKLPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 14 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of ymm2 and ymm3/m256.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VBROADCAST--Broadcast Floating-Point Data.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSS</mnem>
+			<args>xmm1,m32</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 18 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Broadcast single-precision floating-point element in mem to four locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSS</mnem>
+			<args>ymm1,m32</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 18 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Broadcast single-precision floating-point element in mem to eight locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSD</mnem>
+			<args>ymm1,m64</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 19 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Broadcast double-precision floating-point element in mem to four locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTF128</mnem>
+			<args>ymm1,m128</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 1A /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of floating-point data in mem to low and high 128-bits in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSS</mnem>
+			<args>xmm1,xmm2</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 18/r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast the low single-precision floatingpoint element in the source operand to four locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSS</mnem>
+			<args>ymm1,xmm2</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 18 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast low single-precision floating-point element in the source operand to eight locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTSD</mnem>
+			<args>ymm1,xmm2</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 19 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast low double-precision floating-point element in the source operand to four locations in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTPH2PS--Convert 16-bit FP Values to Single-Precision FP Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPH2PS</mnem>
+			<args>ymm1,xmm2/m128</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 13 /r</opc>
+			<cpuid>
+				<flag>F16C</flag>
+			</cpuid>
+			<dscrp>Convert eight packed half precision (16-bit) floating-point values in xmm2/m128 to packed single-precision floating-point value in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPH2PS</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 13 /r</opc>
+			<cpuid>
+				<flag>F16C</flag>
+			</cpuid>
+			<dscrp>Convert four packed half precision (16-bit) floating-point values in xmm2/m64 to packed single-precision floating-point value in xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VCVTPS2PH--Convert Single-Precision FP value to 16-bit FP value.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PH</mnem>
+			<args>xmm1/m128,ymm2,imm8</args>
+			<opc openc="MR">VEX.256.66.0F3A.W0 1D /r ib</opc>
+			<cpuid>
+				<flag>F16C</flag>
+			</cpuid>
+			<dscrp>Convert eight packed single-precision floating-point value in ymm2 to packed half-precision (16-bit) floating-point value in xmm1/mem. Imm8 provides rounding controls.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VCVTPS2PH</mnem>
+			<args>xmm1/m64,xmm2,imm8</args>
+			<opc openc="MR">VEX.128.66.0F3A.W0.1D /r ib</opc>
+			<cpuid>
+				<flag>F16C</flag>
+			</cpuid>
+			<dscrp>Convert four packed single-precision floating-point value in xmm2 to packed halfprecision (16-bit) floating-point value in xmm1/mem. Imm8 provides rounding controls.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VERR/VERW--Verify a Segment for Reading or Writing.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VERR</mnem>
+			<args>r/m16</args>
+			<opc openc="M">0F 00 /4</opc>
+			<dscrp>Set ZF=1 if segment specified with r/m16 can be read.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VERW</mnem>
+			<args>r/m16</args>
+			<opc openc="M">0F 00 /5</opc>
+			<dscrp>Set ZF=1 if segment specified with r/m16 can be written.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VEXTRACTF128--Extract Packed Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTF128</mnem>
+			<args>xmm1/m128,ymm2,imm8</args>
+			<opc openc="MR">VEX.256.66.0F3A.W0 19 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of packed floating-point values from ymm2 and store results in xmm1/mem.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VEXTRACTI128--Extract packed Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VEXTRACTI128</mnem>
+			<args>xmm1/m128,ymm2,imm8</args>
+			<opc openc="RMI">VEX.256.66.0F3A.W0 39 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Extract 128 bits of integer data from ymm2 and store results in xmm1/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMADD132PD/VFMADD213PD/VFMADD231PD--Fused Multiply-Add of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PD</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W1 98 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm0 and xmm2/mem, add to xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PD</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W1 A8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm0 and xmm1, add to xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PD</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W1 B8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2/mem, add to xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PD</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W1 98 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm0 and ymm2/mem, add to ymm1 and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PD</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W1 A8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm0 and ymm1, add to ymm2/mem and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PD</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W1 B8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2/mem, add to ymm0 and put result in ymm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMADD132PS/VFMADD213PS/VFMADD231PS--Fused Multiply-Add of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PS</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W0 98 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm0 and xmm2/mem, add to xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PS</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W0 A8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm0 and xmm1, add to xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PS</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W0 B8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2/mem, add to xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132PS</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W0 98 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm0 and ymm2/mem, add to ymm1 and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213PS</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W0 A8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm0 and ymm1, add to ymm2/mem and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231PS</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W0 B8 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2/mem, add to ymm0 and put result in ymm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMADD132SD/VFMADD213SD/VFMADD231SD--Fused Multiply-Add of Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132SD</mnem>
+			<args>xmm0,xmm1,xmm2/m64</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W1 99 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm0 and xmm2/mem, add to xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213SD</mnem>
+			<args>xmm0,xmm1,xmm2/m64</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W1 A9 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm0 and xmm1, add to xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231SD</mnem>
+			<args>xmm0,xmm1,xmm2/m64</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W1 B9 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2/mem, add to xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMADD132SS/VFMADD213SS/VFMADD231SS--Fused Multiply-Add of Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD132SS</mnem>
+			<args>xmm0,xmm1,xmm2/m32</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W0 99 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm0 and xmm2/mem, add to xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD213SS</mnem>
+			<args>xmm0,xmm1,xmm2/m32</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W0 A9 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm0 and xmm1, add to xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADD231SS</mnem>
+			<args>xmm0,xmm1,xmm2/m32</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W0 B9 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2/mem, add to xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMADDSUB132PD/VFMADDSUB213PD/VFMADDSUB231PD--Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PD</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W1 96 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm0 and xmm2/mem, add/subtract elements in xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PD</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W1 A6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm0 and xmm1, add/subtract elements in xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PD</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W1 B6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2/mem, add/subtract elements in xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PD</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W1 96 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm0 and ymm2/mem, add/subtract elements in ymm1 and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PD</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W1 A6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm0 and ymm1, add/subtract elements in ymm2/mem and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PD</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W1 B6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2/mem, add/subtract elements in ymm0 and put result in ymm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMADDSUB132PS/VFMADDSUB213PS/VFMADDSUB231PS--Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PS</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W0 96 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm0 and xmm2/mem, add/subtract elements in xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PS</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W0 A6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm0 and xmm1, add/subtract elements in xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PS</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W0 B6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2/mem, add/subtract elements in xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB132PS</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W0 96 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm0 and ymm2/mem, add/subtract elements in ymm1 and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB213PS</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W0 A6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm0 and ymm1, add/subtract elements in ymm2/mem and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMADDSUB231PS</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W0 B6 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2/mem, add/subtract elements in ymm0 and put result in ymm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMSUBADD132PD/VFMSUBADD213PD/VFMSUBADD231PD--Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PD</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W1 97 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm0 and xmm2/mem, subtract/add elements in xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PD</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W1 A7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm0 and xmm1, subtract/add elements in xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PD</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W1 B7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2/mem, subtract/add elements in xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PD</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W1 97 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm0 and ymm2/mem, subtract/add elements in ymm1 and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PD</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W1 A7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm0 and ymm1, subtract/add elements in ymm2/mem and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PD</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W1 B7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2/mem, subtract/add elements in ymm0 and put result in ymm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMSUBADD132PS/VFMSUBADD213PS/VFMSUBADD231PS--Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PS</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W0 97 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm0 and xmm2/mem, subtract/add elements in xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PS</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W0 A7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm0 and xmm1, subtract/add elements in xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PS</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W0 B7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2/mem, subtract/add elements in xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD132PS</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W0 97 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm0 and ymm2/mem, subtract/add elements in ymm1 and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD213PS</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W0 A7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm0 and ymm1, subtract/add elements in ymm2/mem and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUBADD231PS</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W0 B7 /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2/mem, subtract/add elements in ymm0 and put result in ymm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMSUB132PD/VFMSUB213PD/VFMSUB231PD--Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PD</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W1 9A /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm0 and xmm2/mem, subtract xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PD</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W1 AA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm0 and xmm1, subtract xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PD</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W1 BA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2/mem, subtract xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PD</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W1 9A /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm0 and ymm2/mem, subtract ymm1 and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PD</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W1 AA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm0 and ymm1, subtract ymm2/mem and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PD</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W1 BA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2/mem, subtract ymm0 and put result in ymm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMSUB132PS/VFMSUB213PS/VFMSUB231PS--Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PS</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W0 9A /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm0 and xmm2/mem, subtract xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PS</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W0 AA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm0 and xmm1, subtract xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PS</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W0 BA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2/mem, subtract xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132PS</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W0 9A /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm0 and ymm2/mem, subtract ymm1 and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213PS</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W0 AA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm0 and ymm1, subtract ymm2/mem and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231PS</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W0 BA /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2/mem, subtract ymm0 and put result in ymm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMSUB132SD/VFMSUB213SD/VFMSUB231SD--Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132SD</mnem>
+			<args>xmm0,xmm1,xmm2/m64</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W1 9B /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm0 and xmm2/mem, subtract xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213SD</mnem>
+			<args>xmm0,xmm1,xmm2/m64</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W1 AB /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm0 and xmm1, subtract xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231SD</mnem>
+			<args>xmm0,xmm1,xmm2/m64</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W1 BB /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2/mem, subtract xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFMSUB132SS/VFMSUB213SS/VFMSUB231SS--Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB132SS</mnem>
+			<args>xmm0,xmm1,xmm2/m32</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W0 9B /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm0 and xmm2/mem, subtract xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB213SS</mnem>
+			<args>xmm0,xmm1,xmm2/m32</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W0 AB /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm0 and xmm1, subtract xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFMSUB231SS</mnem>
+			<args>xmm0,xmm1,xmm2/m32</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W0 BB /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2/mem, subtract xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMADD132PD/VFNMADD213PD/VFNMADD231PD--Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PD</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W1 9C /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm0 and xmm2/mem, negate the multiplication result and add to xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PD</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W1 AC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm0 and xmm1, negate the multiplication result and add to xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PD</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W1 BC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2/mem, negate the multiplication result and add to xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PD</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W1 9C /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm0 and ymm2/mem, negate the multiplication result and add to ymm1 and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PD</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W1 AC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm0 and ymm1, negate the multiplication result and add to ymm2/mem and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PD</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W1 BC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2/mem, negate the multiplication result and add to ymm0 and put result in ymm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMADD132PS/VFNMADD213PS/VFNMADD231PS--Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PS</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W0 9C /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm0 and xmm2/mem, negate the multiplication result and add to xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PS</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W0 AC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm0 and xmm1, negate the multiplication result and add to xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PS</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W0 BC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2/mem, negate the multiplication result and add to xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132PS</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W0 9C /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm0 and ymm2/mem, negate the multiplication result and add to ymm1 and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213PS</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W0 AC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm0 and ymm1, negate the multiplication result and add to ymm2/mem and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231PS</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W0 BC /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2/mem, negate the multiplication result and add to ymm0 and put result in ymm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMADD132SD/VFNMADD213SD/VFNMADD231SD--Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132SD</mnem>
+			<args>xmm0,xmm1,xmm2/m64</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W1 9D /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm0 and xmm2/mem, negate the multiplication result and add to xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213SD</mnem>
+			<args>xmm0,xmm1,xmm2/m64</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W1 AD /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm0 and xmm1, negate the multiplication result and add to xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231SD</mnem>
+			<args>xmm0,xmm1,xmm2/m64</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W1 BD /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2/mem, negate the multiplication result and add to xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMADD132SS/VFNMADD213SS/VFNMADD231SS--Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD132SS</mnem>
+			<args>xmm0,xmm1,xmm2/m32</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W0 9D /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm0 and xmm2/mem, negate the multiplication result and add to xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD213SS</mnem>
+			<args>xmm0,xmm1,xmm2/m32</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W0 AD /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm0 and xmm1, negate the multiplication result and add to xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMADD231SS</mnem>
+			<args>xmm0,xmm1,xmm2/m32</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W0 BD /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2/mem, negate the multiplication result and add to xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMSUB132PD/VFNMSUB213PD/VFNMSUB231PD--Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PD</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W1 9E /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm0 and xmm2/mem, negate the multiplication result and subtract xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PD</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W1 AE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm0 and xmm1, negate the multiplication result and subtract xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PD</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W1 BE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2/mem, negate the multiplication result and subtract xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PD</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W1 9E /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm0 and ymm2/mem, negate the multiplication result and subtract ymm1 and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PD</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W1 AE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm0 and ymm1, negate the multiplication result and subtract ymm2/mem and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PD</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W1 BE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2/mem, negate the multiplication result and subtract ymm0 and put result in ymm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMSUB132PS/VFNMSUB213PS/VFNMSUB231PS--Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PS</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W0 9E /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm0 and xmm2/mem, negate the multiplication result and subtract xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PS</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W0 AE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm0 and xmm1, negate the multiplication result and subtract xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PS</mnem>
+			<args>xmm0,xmm1,xmm2/m128</args>
+			<opc openc="A">VEX.DDS.128.66.0F38.W0 BE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2/mem, negate the multiplication result and subtract xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132PS</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W0 9E /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm0 and ymm2/mem, negate the multiplication result and subtract ymm1 and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213PS</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W0 AE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm0 and ymm1, negate the multiplication result and subtract ymm2/mem and put result in ymm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231PS</mnem>
+			<args>ymm0,ymm1,ymm2/m256</args>
+			<opc openc="A">VEX.DDS.256.66.0F38.W0 BE /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2/mem, negate the multiplication result and subtract ymm0 and put result in ymm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMSUB132SD/VFNMSUB213SD/VFNMSUB231SD--Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132SD</mnem>
+			<args>xmm0,xmm1,xmm2/m64</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W1 9F /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm0 and xmm2/mem, negate the multiplication result and subtract xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213SD</mnem>
+			<args>xmm0,xmm1,xmm2/m64</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W1 AF /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm0 and xmm1, negate the multiplication result and subtract xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231SD</mnem>
+			<args>xmm0,xmm1,xmm2/m64</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W1 BF /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2/mem, negate the multiplication result and subtract xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VFNMSUB132SS/VFNMSUB213SS/VFNMSUB231SS--Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB132SS</mnem>
+			<args>xmm0,xmm1,xmm2/m32</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W0 9F /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm0 and xmm2/mem, negate the multiplication result and subtract xmm1 and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB213SS</mnem>
+			<args>xmm0,xmm1,xmm2/m32</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W0 AF /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm0 and xmm1, negate the multiplication result and subtract xmm2/mem and put result in xmm0.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VFNMSUB231SS</mnem>
+			<args>xmm0,xmm1,xmm2/m32</args>
+			<opc openc="A">VEX.DDS.LIG.128.66.0F38.W0 BF /r</opc>
+			<cpuid>
+				<flag>FMA</flag>
+			</cpuid>
+			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2/mem, negate the multiplication result and subtract xmm0 and put result in xmm0.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGATHERDPD/VGATHERQPD--Gather Packed DP FP Values Using Signed Dword/Qword Indices.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERDPD</mnem>
+			<args>xmm1,vm32x,xmm2</args>
+			<opc openc="RMV">VEX.DDS.128.66.0F38.W1 92 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Using dword indices specified in vm32x, gather double-precision FP values from memory conditioned on mask specified by xmm2. Conditionally gathered elements are merged into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERQPD</mnem>
+			<args>xmm1,vm64x,xmm2</args>
+			<opc openc="RMV">VEX.DDS.128.66.0F38.W1 93 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Using qword indices specified in vm64x, gather double-precision FP values from memory conditioned on mask specified by xmm2. Conditionally gathered elements are merged into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERDPD</mnem>
+			<args>ymm1,vm32x,ymm2</args>
+			<opc openc="RMV">VEX.DDS.256.66.0F38.W1 92 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Using dword indices specified in vm32x, gather double-precision FP values from memory conditioned on mask specified by ymm2. Conditionally gathered elements are merged into ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERQPD</mnem>
+			<args>ymm1,vm64y,ymm2</args>
+			<opc openc="RMV">VEX.DDS.256.66.0F38.W1 93 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Using qword indices specified in vm64y, gather double-precision FP values from memory conditioned on mask specified by ymm2. Conditionally gathered elements are merged into ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd2>
+			<oprnd3>VEX.vvvv(r,w)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VGATHERDPS/VGATHERQPS--Gather Packed SP FP values Using Signed Dword/Qword Indices.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERDPS</mnem>
+			<args>xmm1,vm32x,xmm2</args>
+			<opc openc="RMV">VEX.DDS.128.66.0F38.W0 92 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Using dword indices specified in vm32x, gather single-precision FP values from memory conditioned on mask specified by xmm2. Conditionally gathered elements are merged into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERQPS</mnem>
+			<args>xmm1,vm64x,xmm2</args>
+			<opc openc="RMV">VEX.DDS.128.66.0F38.W0 93 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Using qword indices specified in vm64x, gather single-precision FP values from memory conditioned on mask specified by xmm2. Conditionally gathered elements are merged into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERDPS</mnem>
+			<args>ymm1,vm32y,ymm2</args>
+			<opc openc="RMV">VEX.DDS.256.66.0F38.W0 92 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Using dword indices specified in vm32y, gather single-precision FP values from memory conditioned on mask specified by ymm2. Conditionally gathered elements are merged into ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VGATHERQPS</mnem>
+			<args>xmm1,vm64y,xmm2</args>
+			<opc openc="RMV">VEX.DDS.256.66.0F38.W0 93 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Using qword indices specified in vm64y, gather single-precision FP values from memory conditioned on mask specified by xmm2. Conditionally gathered elements are merged into xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd2>
+			<oprnd3>VEX.vvvv(r,w)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPGATHERDD/VPGATHERQD--Gather Packed Dword Values Using Signed Dword/Qword Indices.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERDD</mnem>
+			<args>xmm1,vm32x,xmm2</args>
+			<opc openc="RMV">VEX.DDS.128.66.0F38.W0 90 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Using dword indices specified in vm32x, gather dword values from memory conditioned on mask specified by xmm2. Conditionally gathered elements are merged into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERQD</mnem>
+			<args>xmm1,vm64x,xmm2</args>
+			<opc openc="RMV">VEX.DDS.128.66.0F38.W0 91 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Using qword indices specified in vm64x, gather dword values from memory conditioned on mask specified by xmm2. Conditionally gathered elements are merged into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERDD</mnem>
+			<args>ymm1,vm32y,ymm2</args>
+			<opc openc="RMV">VEX.DDS.256.66.0F38.W0 90 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Using dword indices specified in vm32y, gather dword from memory conditioned on mask specified by ymm2. Conditionally gathered elements are merged into ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERQD</mnem>
+			<args>xmm1,vm64y,xmm2</args>
+			<opc openc="RMV">VEX.DDS.256.66.0F38.W0 91 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Using qword indices specified in vm64y, gather dword values from memory conditioned on mask specified by xmm2. Conditionally gathered elements are merged into xmm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMV">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd2>
+			<oprnd3>VEX.vvvv(r,w)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPGATHERDQ/VPGATHERQQ--Gather Packed Qword Values Using Signed Dword/Qword Indices.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERDQ</mnem>
+			<args>xmm1,vm32x,xmm2</args>
+			<opc openc="RMV">VEX.DDS.128.66.0F38.W1 90 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Using dword indices specified in vm32x, gather qword values from memory conditioned on mask specified by xmm2. Conditionally gathered elements are merged into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERQQ</mnem>
+			<args>xmm1,vm64x,xmm2</args>
+			<opc openc="RMV">VEX.DDS.128.66.0F38.W1 91 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Using qword indices specified in vm64x, gather qword values from memory conditioned on mask specified by xmm2. Conditionally gathered elements are merged into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERDQ</mnem>
+			<args>ymm1,vm32x,ymm2</args>
+			<opc openc="RMV">VEX.DDS.256.66.0F38.W1 90 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Using dword indices specified in vm32x, gather qword values from memory conditioned on mask specified by ymm2. Conditionally gathered elements are merged into ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPGATHERQQ</mnem>
+			<args>ymm1,vm64y,ymm2</args>
+			<opc openc="RMV">VEX.DDS.256.66.0F38.W1 91 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Using qword indices specified in vm64y, gather qword values from memory conditioned on mask specified by ymm2. Conditionally gathered elements are merged into ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd2>
+			<oprnd3>VEX.vvvv(r,w)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VINSERTF128--Insert Packed Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTF128</mnem>
+			<args>ymm1,ymm2,xmm3/m128,imm8</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F3A.W0 18 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Insert a single precision floating-point value selected by imm8 from xmm3/m128 into ymm2 at the specified destination element specified by imm8 and zero out destination elements in ymm1 as indicated in imm8.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VINSERTI128--Insert Packed Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VINSERTI128</mnem>
+			<args>ymm1,ymm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F3A.W0 38 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Insert 128-bits of integer data from xmm3/mem and the remaining values from ymm2 into ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VMASKMOV--Conditional SIMD Packed Loads and Stores.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VMASKMOVPS</mnem>
+			<args>xmm1,xmm2,m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 2C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Conditionally load packed single-precision values from m128 using mask in xmm2 and store in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMASKMOVPS</mnem>
+			<args>ymm1,ymm2,m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 2C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Conditionally load packed single-precision values from m256 using mask in ymm2 and store in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMASKMOVPD</mnem>
+			<args>xmm1,xmm2,m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 2D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Conditionally load packed double-precision values from m128 using mask in xmm2 and store in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMASKMOVPD</mnem>
+			<args>ymm1,ymm2,m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 2D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Conditionally load packed double-precision values from m256 using mask in ymm2 and store in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMASKMOVPS</mnem>
+			<args>m128,xmm1,xmm2</args>
+			<opc openc="MVR">VEX.NDS.128.66.0F38.W0 2E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Conditionally store packed single-precision values from xmm2 using mask in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMASKMOVPS</mnem>
+			<args>m256,ymm1,ymm2</args>
+			<opc openc="MVR">VEX.NDS.256.66.0F38.W0 2E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Conditionally store packed single-precision values from ymm2 using mask in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMASKMOVPD</mnem>
+			<args>m128,xmm1,xmm2</args>
+			<opc openc="MVR">VEX.NDS.128.66.0F38.W0 2F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Conditionally store packed double-precision values from xmm2 using mask in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VMASKMOVPD</mnem>
+			<args>m256,ymm1,ymm2</args>
+			<opc openc="MVR">VEX.NDS.256.66.0F38.W0 2F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Conditionally store packed double-precision values from ymm2 using mask in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MVR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:reg(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPBLENDD--Blend Packed Dwords.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDD</mnem>
+			<args>xmm1,xmm2,xmm3/m128,imm8</args>
+			<opc openc="RVMI">VEX.NDS.128.66.0F3A.W0 02 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Select dwords from xmm2 and xmm3/m128 from mask specified in imm8 and store the values into xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBLENDD</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F3A.W0 02 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Select dwords from ymm2 and ymm3/m256 from mask specified in imm8 and store the values into ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPBROADCAST--Broadcast Integer Data.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTB</mnem>
+			<args>xmm1,xmm2/m8</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a byte integer in the source operand to sixteen locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTB</mnem>
+			<args>ymm1,xmm2/m8</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 78 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a byte integer in the source operand to thirtytwo locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTW</mnem>
+			<args>xmm1,xmm2/m16</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a word integer in the source operand to eight locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTW</mnem>
+			<args>ymm1,xmm2/m16</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 79 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a word integer in the source operand to sixteen locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTD</mnem>
+			<args>xmm1,xmm2/m32</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a dword integer in the source operand to four locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTD</mnem>
+			<args>ymm1,xmm2/m32</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 58 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a dword integer in the source operand to eight locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTQ</mnem>
+			<args>xmm1,xmm2/m64</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a qword element in mem to two locations in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPBROADCASTQ</mnem>
+			<args>ymm1,xmm2/m64</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 59 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast a qword element in mem to four locations in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VBROADCASTI128</mnem>
+			<args>ymm1,m128</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 5A /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Broadcast 128 bits of integer data in mem to low and high 128-bits in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMD--Full Doublewords Element Permutation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 36 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Permute doublewords in ymm3/m256 using indexes in ymm2 and store the result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMPD--Permute Double-Precision Floating-Point Elements.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMPD</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.66.0F3A.W1 01 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point elements in ymm2/m256 using indexes in imm8 and store the result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMPS--Permute Single-Precision Floating-Point Elements.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 16 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point elements in ymm3/m256 using indexes in ymm2 and store the result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMQ--Qwords Element Permutation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMQ</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.66.0F3A.W1 00 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Permute qwords in ymm2/m256 using indexes in imm8 and store the result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>Imm8</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERM2I128--Permute Integer Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERM2I128</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F3A.W0 46 /r ib</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Permute 128-bit integer data in ymm2 and ymm3/mem using controls from imm8 and store result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>Imm8</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMILPD--Permute Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 0D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in xmm2 using controls from xmm3/mem and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 0D /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in ymm2 using controls from ymm3/mem and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">VEX.128.66.0F3A.W0 05 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in xmm2/mem using controls from imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPD</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.66.0F3A.W0 05 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute double-precision floating-point values in ymm2/mem using controls from imm8.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERMILPS--Permute Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 0C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values in xmm2 using controls from xmm3/mem and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>xmm1,xmm2/m128,imm8</args>
+			<opc openc="RMI">VEX.128.66.0F3A.W0 04 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values in xmm2/mem using controls from imm8 and store result in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 0C /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values in ymm2 using controls from ymm3/mem and store result in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERMILPS</mnem>
+			<args>ymm1,ymm2/m256,imm8</args>
+			<opc openc="RMI">VEX.256.66.0F3A.W0 04 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute single-precision floating-point values in ymm2/mem using controls from imm8 and store result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>imm8(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPERM2F128--Permute Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPERM2F128</mnem>
+			<args>ymm1,ymm2,ymm3/m256,imm8</args>
+			<opc openc="RVMI">VEX.NDS.256.66.0F3A.W0 06 /r ib</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Permute 128-bit floating-point fields in ymm2 and ymm3/mem using controls from imm8 and store result in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVMI">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>imm8(r)</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPMASKMOV--Conditional SIMD Integer Packed Loads and Stores.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMASKMOVD</mnem>
+			<args>xmm1,xmm2,m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 8C /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Conditionally load dword values from m128 using mask in xmm2 and store in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMASKMOVD</mnem>
+			<args>ymm1,ymm2,m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 8C /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Conditionally load dword values from m256 using mask in ymm2 and store in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMASKMOVQ</mnem>
+			<args>xmm1,xmm2,m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 8C /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Conditionally load qword values from m128 using mask in xmm2 and store in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMASKMOVQ</mnem>
+			<args>ymm1,ymm2,m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 8C /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Conditionally load qword values from m256 using mask in ymm2 and store in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMASKMOVD</mnem>
+			<args>m128,xmm1,xmm2</args>
+			<opc openc="MVR">VEX.NDS.128.66.0F38.W0 8E /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Conditionally store dword values from xmm2 using mask in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMASKMOVD</mnem>
+			<args>m256,ymm1,ymm2</args>
+			<opc openc="MVR">VEX.NDS.256.66.0F38.W0 8E /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Conditionally store dword values from ymm2 using mask in ymm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMASKMOVQ</mnem>
+			<args>m128,xmm1,xmm2</args>
+			<opc openc="MVR">VEX.NDS.128.66.0F38.W1 8E /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Conditionally store qword values from xmm2 using mask in xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPMASKMOVQ</mnem>
+			<args>m256,ymm1,ymm2</args>
+			<opc openc="MVR">VEX.NDS.256.66.0F38.W1 8E /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Conditionally store qword values from ymm2 using mask in ymm1.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MVR">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:reg(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPSLLVD/VPSLLVQ--Variable Bit Shift Left Logical.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 47 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift bits in doublewords in xmm2 left by amount specified in the corresponding element of xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 47 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift bits in quadwords in xmm2 left by amount specified in the corresponding element of xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 47 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift bits in doublewords in ymm2 left by amount specified in the corresponding element of ymm3/m256 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSLLVQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 47 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift bits in quadwords in ymm2 left by amount specified in the corresponding element of ymm3/m256 while shifting in 0s.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPSRAVD--Variable Bit Shift Right Arithmetic.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 46 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift bits in doublewords in xmm2 right by amount specified in the corresponding element of xmm3/m128 while shifting in the sign bits.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRAVD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 46 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift bits in doublewords in ymm2 right by amount specified in the corresponding element of ymm3/m256 while shifting in the sign bits.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VPSRLVD/VPSRLVQ--Variable Bit Shift Right Logical.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 45 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift bits in doublewords in xmm2 right by amount specified in the corresponding element of xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVQ</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 45 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift bits in quadwords in xmm2 right by amount specified in the corresponding element of xmm3/m128 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 45 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift bits in doublewords in ymm2 right by amount specified in the corresponding element of ymm3/m256 while shifting in 0s.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VPSRLVQ</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 45 /r</opc>
+			<cpuid>
+				<flag>AVX2</flag>
+			</cpuid>
+			<dscrp>Shift bits in quadwords in ymm2 right by amount specified in the corresponding element of ymm3/m256 while shifting in 0s.</dscrp>
+		</ins>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VTESTPD/VTESTPS--Packed Bit Test.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VTESTPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 0E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Set ZF and CF depending on sign bit AND and ANDN of packed single-precision floating-point sources.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VTESTPS</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 0E /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Set ZF and CF depending on sign bit AND and ANDN of packed single-precision floating-point sources.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VTESTPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">VEX.128.66.0F38.W0 0F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Set ZF and CF depending on sign bit AND and ANDN of packed double-precision floating-point sources.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VTESTPD</mnem>
+			<args>ymm1,ymm2/m256</args>
+			<opc openc="RM">VEX.256.66.0F38.W0 0F /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Set ZF and CF depending on sign bit AND and ANDN of packed double-precision floating-point sources.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VZEROALL--Zero All YMM Registers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VZEROALL</mnem>
+			<args>void</args>
+			<opc openc="NP">VEX.256.0F.WIG 77</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero all YMM registers.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>VZEROUPPER--Zero Upper Bits of YMM Registers.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>VZEROUPPER</mnem>
+			<args>void</args>
+			<opc openc="NP">VEX.128.0F.WIG 77</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Zero upper 128 bits of all YMM registers.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>WAIT/FWAIT--Wait.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>WAIT</mnem>
+			<args>void</args>
+			<opc openc="NP">9B</opc>
+			<dscrp>Check pending unmasked floating-point exceptions.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>FWAIT</mnem>
+			<args>void</args>
+			<opc openc="NP">9B</opc>
+			<dscrp>Check pending unmasked floating-point exceptions.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>WBINVD--Write Back and Invalidate Cache.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>WBINVD</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 09</opc>
+			<dscrp>Write back and flush Internal caches; initiate writing-back and flushing of external caches.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>WRFSBASE/WRGSBASE--Write FS/GS Segment Base.</brief>
+		<ins x32m="I" x64m="V">
+			<mnem>WRFSBASE</mnem>
+			<args>r32</args>
+			<opc openc="M">F3 0F AE /2</opc>
+			<cpuid>
+				<flag>FSGSBASE</flag>
+			</cpuid>
+			<dscrp>Load the FS base address with the 32-bit value in the source register.</dscrp>
+		</ins>
+		<ins x32m="I" x64m="V">
+			<mnem>WRFSBASE</mnem>
+			<args>r64</args>
+			<opc openc="M">F3 REX.W 0F AE /2</opc>
+			<cpuid>
+				<flag>FSGSBASE</flag>
+			</cpuid>
+			<dscrp>Load the FS base address with the 64-bit value in the source register.</dscrp>
+		</ins>
+		<ins x32m="I" x64m="V">
+			<mnem>WRGSBASE</mnem>
+			<args>r32</args>
+			<opc openc="M">F3 0F AE /3</opc>
+			<cpuid>
+				<flag>FSGSBASE</flag>
+			</cpuid>
+			<dscrp>Load the GS base address with the 32-bit value in the source register.</dscrp>
+		</ins>
+		<ins x32m="I" x64m="V">
+			<mnem>WRGSBASE</mnem>
+			<args>r64</args>
+			<opc openc="M">F3 REX.W 0F AE /3</opc>
+			<cpuid>
+				<flag>FSGSBASE</flag>
+			</cpuid>
+			<dscrp>Load the GS base address with the 64-bit value in the source register.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>WRMSR--Write to Model Specific Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>WRMSR</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 30</opc>
+			<dscrp>Write the value in EDX:EAX to MSR specified by ECX.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>WRPKRU--Write Data to User Page Key Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>WRPKRU</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 01 EF</opc>
+			<cpuid>
+				<flag>OSPKE</flag>
+			</cpuid>
+			<dscrp>Writes EAX into PKRU.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XACQUIRE/XRELEASE--Hardware Lock Elision Prefix Hints.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XACQUIRE</mnem>
+			<args>void</args>
+			<opc openc="">F2</opc>
+			<cpuid>
+				<flag>HLE1</flag>
+			</cpuid>
+			<dscrp>A hint used with an 'XACQUIRE-enabled' instruction to start lock elision on the instruction memory operand address.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XRELEASE</mnem>
+			<args>void</args>
+			<opc openc="">F3</opc>
+			<cpuid>
+				<flag>HLE</flag>
+			</cpuid>
+			<dscrp>A hint used with an 'XRELEASE-enabled' instruction to end lock elision on the instruction memory operand address.</dscrp>
+		</ins>
+	</common>
+	<common>
+		<brief>XABORT--Transactional Abort.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XABORT</mnem>
+			<args>imm8</args>
+			<opc openc="A">C6 F8 ib</opc>
+			<cpuid>
+				<flag>RTM</flag>
+			</cpuid>
+			<dscrp>Causes an RTM abort if in RTM execution.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>imm8(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XADD--Exchange and Add.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XADD</mnem>
+			<args>r/m8,r8</args>
+			<opc openc="MR">0F C0 /r</opc>
+			<dscrp>Exchange r8 and r/m8; load sum into r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XADD</mnem>
+			<args>r/m8*,r8*</args>
+			<opc openc="MR">REX + 0F C0 /r</opc>
+			<dscrp>Exchange r8 and r/m8; load sum into r/m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XADD</mnem>
+			<args>r/m16,r16</args>
+			<opc openc="MR">0F C1 /r</opc>
+			<dscrp>Exchange r16 and r/m16; load sum into r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XADD</mnem>
+			<args>r/m32,r32</args>
+			<opc openc="MR">0F C1 /r</opc>
+			<dscrp>Exchange r32 and r/m32; load sum into r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XADD</mnem>
+			<args>r/m64,r64</args>
+			<opc openc="MR">REX.W + 0F C1 /r</opc>
+			<dscrp>Exchange r64 and r/m64; load sum into r/m64.</dscrp>
+		</ins>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>ModRM:reg(W)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XBEGIN--Transactional Begin.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XBEGIN</mnem>
+			<args>rel16</args>
+			<opc openc="A">C7 F8</opc>
+			<cpuid>
+				<flag>RTM</flag>
+			</cpuid>
+			<dscrp>Specifies the start of an RTM region. Provides a 16-bit relative offset to compute the address of the fallback instruction address at which execution resumes following an RTM abort.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XBEGIN</mnem>
+			<args>rel32</args>
+			<opc openc="A">C7 F8</opc>
+			<cpuid>
+				<flag>RTM</flag>
+			</cpuid>
+			<dscrp>Specifies the start of an RTM region. Provides a 32-bit relative offset to compute the address of the fallback instruction address at which execution resumes following an RTM abort.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>Offset</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XCHG--Exchange Register/Memory with Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XCHG</mnem>
+			<args>AX,r16</args>
+			<opc openc="O">90+rw</opc>
+			<dscrp>Exchange r16 with AX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XCHG</mnem>
+			<args>r16,AX</args>
+			<opc openc="O">90+rw</opc>
+			<dscrp>Exchange AX with r16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XCHG</mnem>
+			<args>EAX,r32</args>
+			<opc openc="O">90+rd</opc>
+			<dscrp>Exchange r32 with EAX.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XCHG</mnem>
+			<args>RAX,r64</args>
+			<opc openc="O">REX.W + 90+rd</opc>
+			<dscrp>Exchange r64 with RAX.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XCHG</mnem>
+			<args>r32,EAX</args>
+			<opc openc="O">90+rd</opc>
+			<dscrp>Exchange EAX with r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XCHG</mnem>
+			<args>r64,RAX</args>
+			<opc openc="O">REX.W + 90+rd</opc>
+			<dscrp>Exchange RAX with r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XCHG</mnem>
+			<args>r/m8,r8</args>
+			<opc openc="MR">86 /r</opc>
+			<dscrp>Exchange r8 (byte register) with byte from r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XCHG</mnem>
+			<args>r/m8*,r8*</args>
+			<opc openc="MR">REX + 86 /r</opc>
+			<dscrp>Exchange r8 (byte register) with byte from r/m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XCHG</mnem>
+			<args>r8,r/m8</args>
+			<opc openc="RM">86 /r</opc>
+			<dscrp>Exchange byte from r/m8 with r8 (byte register).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XCHG</mnem>
+			<args>r8*,r/m8*</args>
+			<opc openc="RM">REX + 86 /r</opc>
+			<dscrp>Exchange byte from r/m8 with r8 (byte register).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XCHG</mnem>
+			<args>r/m16,r16</args>
+			<opc openc="MR">87 /r</opc>
+			<dscrp>Exchange r16 with word from r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XCHG</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">87 /r</opc>
+			<dscrp>Exchange word from r/m16 with r16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XCHG</mnem>
+			<args>r/m32,r32</args>
+			<opc openc="MR">87 /r</opc>
+			<dscrp>Exchange r32 with doubleword from r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XCHG</mnem>
+			<args>r/m64,r64</args>
+			<opc openc="MR">REX.W + 87 /r</opc>
+			<dscrp>Exchange r64 with quadword from r/m64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XCHG</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">87 /r</opc>
+			<dscrp>Exchange doubleword from r/m32 with r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XCHG</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 87 /r</opc>
+			<dscrp>Exchange quadword from r/m64 with r64.</dscrp>
+		</ins>
+		<oprndenc openc="O">
+			<oprnd1>AX/EAX/RAX(r,w)</oprnd1>
+			<oprnd2>opcode + rd(r,w)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="O">
+			<oprnd1>opcode + rd(r,w)</oprnd1>
+			<oprnd2>AX/EAX/RAX(r,w)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XEND--Transactional End.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XEND</mnem>
+			<args>void</args>
+			<opc openc="A">0F 01 D5</opc>
+			<cpuid>
+				<flag>RTM</flag>
+			</cpuid>
+			<dscrp>Specifies the end of an RTM code region.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XGETBV--Get Value of Extended Control Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XGETBV</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 01 D0</opc>
+			<dscrp>Reads an XCR specified by ECX into EDX:EAX.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XLAT/XLATB--Table Look-up Translation.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XLAT</mnem>
+			<args>m8</args>
+			<opc openc="NP">D7</opc>
+			<dscrp>Set AL to memory byte DS:[(E)BX + unsigned AL].</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XLATB</mnem>
+			<args>void</args>
+			<opc openc="NP">D7</opc>
+			<dscrp>Set AL to memory byte DS:[(E)BX + unsigned AL].</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XLATB</mnem>
+			<args>void</args>
+			<opc openc="NP">REX.W + D7</opc>
+			<dscrp>Set AL to memory byte [RBX + unsigned AL].</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XOR--Logical Exclusive OR.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XOR</mnem>
+			<args>AL,imm8</args>
+			<opc openc="I">34 ib</opc>
+			<dscrp>AL XOR imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XOR</mnem>
+			<args>AX,imm16</args>
+			<opc openc="I">35 iw</opc>
+			<dscrp>AX XOR imm16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XOR</mnem>
+			<args>EAX,imm32</args>
+			<opc openc="I">35 id</opc>
+			<dscrp>EAX XOR imm32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XOR</mnem>
+			<args>RAX,imm32</args>
+			<opc openc="I">REX.W + 35 id</opc>
+			<dscrp>RAX XOR imm32 (sign-extended).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XOR</mnem>
+			<args>r/m8,imm8</args>
+			<opc openc="MI">80 /6 ib</opc>
+			<dscrp>r/m8 XOR imm8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XOR</mnem>
+			<args>r/m8*,imm8</args>
+			<opc openc="MI">REX + 80 /6 ib</opc>
+			<dscrp>r/m8 XOR imm8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XOR</mnem>
+			<args>r/m16,imm16</args>
+			<opc openc="MI">81 /6 iw</opc>
+			<dscrp>r/m16 XOR imm16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XOR</mnem>
+			<args>r/m32,imm32</args>
+			<opc openc="MI">81 /6 id</opc>
+			<dscrp>r/m32 XOR imm32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XOR</mnem>
+			<args>r/m64,imm32</args>
+			<opc openc="MI">REX.W + 81 /6 id</opc>
+			<dscrp>r/m64 XOR imm32 (sign-extended).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XOR</mnem>
+			<args>r/m16,imm8</args>
+			<opc openc="MI">83 /6 ib</opc>
+			<dscrp>r/m16 XOR imm8 (sign-extended).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XOR</mnem>
+			<args>r/m32,imm8</args>
+			<opc openc="MI">83 /6 ib</opc>
+			<dscrp>r/m32 XOR imm8 (sign-extended).</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XOR</mnem>
+			<args>r/m64,imm8</args>
+			<opc openc="MI">REX.W + 83 /6 ib</opc>
+			<dscrp>r/m64 XOR imm8 (sign-extended).</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XOR</mnem>
+			<args>r/m8,r8</args>
+			<opc openc="MR">30 /r</opc>
+			<dscrp>r/m8 XOR r8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XOR</mnem>
+			<args>r/m8*,r8*</args>
+			<opc openc="MR">REX + 30 /r</opc>
+			<dscrp>r/m8 XOR r8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XOR</mnem>
+			<args>r/m16,r16</args>
+			<opc openc="MR">31 /r</opc>
+			<dscrp>r/m16 XOR r16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XOR</mnem>
+			<args>r/m32,r32</args>
+			<opc openc="MR">31 /r</opc>
+			<dscrp>r/m32 XOR r32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XOR</mnem>
+			<args>r/m64,r64</args>
+			<opc openc="MR">REX.W + 31 /r</opc>
+			<dscrp>r/m64 XOR r64.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XOR</mnem>
+			<args>r8,r/m8</args>
+			<opc openc="RM">32 /r</opc>
+			<dscrp>r8 XOR r/m8.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XOR</mnem>
+			<args>r8*,r/m8*</args>
+			<opc openc="RM">REX + 32 /r</opc>
+			<dscrp>r8 XOR r/m8.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XOR</mnem>
+			<args>r16,r/m16</args>
+			<opc openc="RM">33 /r</opc>
+			<dscrp>r16 XOR r/m16.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XOR</mnem>
+			<args>r32,r/m32</args>
+			<opc openc="RM">33 /r</opc>
+			<dscrp>r32 XOR r/m32.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XOR</mnem>
+			<args>r64,r/m64</args>
+			<opc openc="RM">REX.W + 33 /r</opc>
+			<dscrp>r64 XOR r/m64.</dscrp>
+		</ins>
+		<oprndenc openc="I">
+			<oprnd1>AL/AX/EAX/RAX</oprnd1>
+			<oprnd2>imm8(r)/16/32</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MI">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>imm8(r)/16/32</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="MR">
+			<oprnd1>ModRM:r/m(r,w)</oprnd1>
+			<oprnd2>ModRM:reg(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XORPD--Bitwise Logical XOR for Double-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XORPD</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">66 0F 57 /r</opc>
+			<cpuid>
+				<flag>SSE2</flag>
+			</cpuid>
+			<dscrp>Bitwise exclusive-OR of xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPD</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 57 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed double-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPD</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 57 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed double-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XORPS--Bitwise Logical XOR for Single-Precision Floating-Point Values.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XORPS</mnem>
+			<args>xmm1,xmm2/m128</args>
+			<opc openc="RM">0F 57 /r</opc>
+			<cpuid>
+				<flag>SSE</flag>
+			</cpuid>
+			<dscrp>Bitwise exclusive-OR of xmm2/m128 and xmm1.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPS</mnem>
+			<args>xmm1,xmm2,xmm3/m128</args>
+			<opc openc="RVM">VEX.NDS.128.0F.WIG 57 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in xmm2 and xmm3/mem.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>VXORPS</mnem>
+			<args>ymm1,ymm2,ymm3/m256</args>
+			<opc openc="RVM">VEX.NDS.256.0F.WIG 57 /r</opc>
+			<cpuid>
+				<flag>AVX</flag>
+			</cpuid>
+			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in ymm2 and ymm3/mem.</dscrp>
+		</ins>
+		<oprndenc openc="RM">
+			<oprnd1>ModRM:reg(r,w)</oprnd1>
+			<oprnd2>ModRM:r/m(r)</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+		<oprndenc openc="RVM">
+			<oprnd1>ModRM:reg(w)</oprnd1>
+			<oprnd2>VEX.vvvv(r)</oprnd2>
+			<oprnd3>ModRM:r/m(r)</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XRSTOR--Restore Processor Extended States.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XRSTOR</mnem>
+			<args>mem</args>
+			<opc openc="M">0F AE /5</opc>
+			<dscrp>Restore state components specified by EDX:EAX from mem.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XRSTOR64</mnem>
+			<args>mem</args>
+			<opc openc="M">REX.W+ 0F AE /5</opc>
+			<dscrp>Restore state components specified by EDX:EAX from mem.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XRSTORS--Restore Processor Extended States Supervisor.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XRSTORS</mnem>
+			<args>mem</args>
+			<opc openc="M">0F C7 /3</opc>
+			<dscrp>Restore state components specified by EDX:EAX from mem.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XRSTORS64</mnem>
+			<args>mem</args>
+			<opc openc="M">REX.W+ 0F C7 /3</opc>
+			<dscrp>Restore state components specified by EDX:EAX from mem.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(r)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XSAVE--Save Processor Extended States.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XSAVE</mnem>
+			<args>mem</args>
+			<opc openc="M">0F AE /4</opc>
+			<dscrp>Save state components specified by EDX:EAX to mem.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XSAVE64</mnem>
+			<args>mem</args>
+			<opc openc="M">REX.W+ 0F AE /4</opc>
+			<dscrp>Save state components specified by EDX:EAX to mem.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XSAVEC--Save Processor Extended States with Compaction.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XSAVEC</mnem>
+			<args>mem</args>
+			<opc openc="M">0F C7 /4</opc>
+			<dscrp>Save state components specified by EDX:EAX to mem with compaction.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XSAVEC64</mnem>
+			<args>mem</args>
+			<opc openc="M">REX.W+ 0F C7 /4</opc>
+			<dscrp>Save state components specified by EDX:EAX to mem with compaction.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XSAVEOPT--Save Processor Extended States Optimized.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XSAVEOPT</mnem>
+			<args>mem</args>
+			<opc openc="M">0F AE /6</opc>
+			<cpuid>
+				<flag>XSAVEOPT</flag>
+			</cpuid>
+			<dscrp>Save state components specified by EDX:EAX to mem, optimizing if possible.</dscrp>
+		</ins>
+		<ins x32m="V" x64m="V">
+			<mnem>XSAVEOPT64</mnem>
+			<args>mem</args>
+			<opc openc="M">REX.W + 0F AE /6</opc>
+			<cpuid>
+				<flag>XSAVEOPT</flag>
+			</cpuid>
+			<dscrp>Save state components specified by EDX:EAX to mem, optimizing if possible.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XSAVES--Save Processor Extended States Supervisor.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XSAVES</mnem>
+			<args>mem</args>
+			<opc openc="M">0F C7 /5</opc>
+			<dscrp>Save state components specified by EDX:EAX to mem with compaction, optimizing if possible.</dscrp>
+		</ins>
+		<ins x32m="NE" x64m="V">
+			<mnem>XSAVES64</mnem>
+			<args>mem</args>
+			<opc openc="M">REX.W+ 0F C7 /5</opc>
+			<dscrp>Save state components specified by EDX:EAX to mem with compaction, optimizing if possible.</dscrp>
+		</ins>
+		<oprndenc openc="M">
+			<oprnd1>ModRM:r/m(w)</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XSETBV--Set Extended Control Register.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XSETBV</mnem>
+			<args>void</args>
+			<opc openc="NP">0F 01 D1</opc>
+			<dscrp>Write the value in EDX:EAX to the XCR specified by ECX.</dscrp>
+		</ins>
+		<oprndenc openc="NP">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+	<common>
+		<brief>XTEST--Test If In Transactional Execution.</brief>
+		<ins x32m="V" x64m="V">
+			<mnem>XTEST</mnem>
+			<args>void</args>
+			<opc openc="A">0F 01 D6</opc>
+			<cpuid>
+				<flag>HLE</flag>
+				<flag>RTM</flag>
+			</cpuid>
+			<dscrp>Test if executing in a transactional region.</dscrp>
+		</ins>
+		<oprndenc openc="A">
+			<oprnd1>NA</oprnd1>
+			<oprnd2>NA</oprnd2>
+			<oprnd3>NA</oprnd3>
+			<oprnd4>NA</oprnd4>
+		</oprndenc>
+	</common>
+</instrs>
diff --git a/xml/raw/x86/Intel/AZ_Rules.dtd b/xml/raw/x86/Intel/AZ_Rules.dtd
new file mode 100644
index 0000000..4b1e190
--- /dev/null
+++ b/xml/raw/x86/Intel/AZ_Rules.dtd
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="ASCII"?>
+<!-- AZ Rules -->
+<!--
+https://github.com/MahdiSafsafi/Parsable-Instructions
+-->
+
+<!-- XML data must be validated.
+     If XML validation failed ,you probably have a corrupted data ! 
+-->
+
+<!ELEMENT instrs (common+)>
+
+<!ELEMENT common (brief,ins+,oprndenc*)>
+
+<!-- cpuid and dscrp elements are optional. -->
+<!ELEMENT ins (mnem,args,opc,cpuid*,dscrp*)>
+<!-- If cpuid tag found , flag tag must exists. -->
+<!ELEMENT cpuid (flag+)>
+<!-- If oprndenc tag found , all oprndX must exists. -->
+<!ELEMENT oprndenc (oprnd1,oprnd2,oprnd3,oprnd4)>
+
+<!ELEMENT brief (#PCDATA)>
+<!ELEMENT mnem (#PCDATA)>
+<!ELEMENT args (#PCDATA)>
+<!ELEMENT opc (#PCDATA)>
+<!ELEMENT flag (#PCDATA)>
+<!ELEMENT dscrp (#PCDATA)>
+<!ELEMENT oprnd1 (#PCDATA)>
+<!ELEMENT oprnd2 (#PCDATA)>
+<!ELEMENT oprnd3 (#PCDATA)>
+<!ELEMENT oprnd4 (#PCDATA)>
+
+<!-- version attribute must be specified !-->
+<!ATTLIST instrs version CDATA #REQUIRED>
+<!ATTLIST ins x32m CDATA "V"> <!-- x32m default to Valid if not specified.-->
+<!ATTLIST ins x64m CDATA "V"> <!-- x64m default to Valid if not specified.-->
+<!ATTLIST opc openc CDATA "">
+<!ATTLIST oprndenc openc CDATA #REQUIRED>
\ No newline at end of file