blackjack/xml/raw/x86/Intel/AVX512_r24.xml

<?xml version="1.0" encoding="ASCII"?>
<!DOCTYPE instrs SYSTEM "AVX512_Rules.dtd">
<!-- Copyright (c) 2016 Mahdi Safsafi

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

 -->
<!--   https://github.com/MahdiSafsafi/Parsable-Instructions   -->
<!--
  This XML file includes all instructions found in :
  Intel Architecture Instruction Set Extensions Programming Reference 319433-024 document.
 -->
<!--
****KEY TO ABBREVIATIONS****
  x32m = 32-bit mode support.
  x64m = 64-bit mode support.
  mnem = Instruction Mnemonic.
  args = Instruction Arguments.
  opc  = Opcodes.
  openc = Operand Encoding.
  dscrp = Description.
  oprndenc = Instruction Operand Encoding.
  oprnd1 = Operand 1.
  oprnd2 = Operand 2.
  oprnd3 = Operand 3.
  oprnd4 = Operand 4.

****FOR THE REST OF KEYS YOU SHOULD REFER TO INTEL DOCUMENTATIONS!****
 -->
<instrs version="1.00">
	<common>
		<brief>ADDPD--Add Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>ADDPD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 58 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Add packed double-precision floating-point values from xmm2/mem to xmm1 and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VADDPD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 58 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Add packed double-precision floating-point values from xmm3/mem to xmm2 and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VADDPD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 58 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Add packed double-precision floating-point values from ymm3/mem to ymm2 and store result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VADDPD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 58 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Add packed double-precision floating-point values from xmm3/m128/m64bcst to xmm2 and store result in xmm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VADDPD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 58 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Add packed double-precision floating-point values from ymm3/m256/m64bcst to ymm2 and store result in ymm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VADDPD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 58 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Add packed double-precision floating-point values from zmm3/m512/m64bcst to zmm2 and store result in zmm1 with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV-RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>ADDPS--Add Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>ADDPS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 58 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Add packed single-precision floating-point values from xmm2/m128 to xmm1 and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VADDPS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.0F.WIG 58 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Add packed single-precision floating-point values from xmm3/m128 to xmm2 and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VADDPS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.0F.WIG 58 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Add packed single-precision floating-point values from ymm3/m256 to ymm2 and store result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VADDPS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.0F.W0 58 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Add packed single-precision floating-point values from xmm3/m128/m32bcst to xmm2 and store result in xmm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VADDPS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.0F.W0 58 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Add packed single-precision floating-point values from ymm3/m256/m32bcst to ymm2 and store result in ymm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VADDPS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst {er}</args>
			<opc openc="FV">EVEX.NDS.512.0F.W0 58 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Add packed single-precision floating-point values from zmm3/m512/m32bcst to zmm2 and store result in zmm1 with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV-RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>ADDSD--Add Scalar Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>ADDSD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">F2 0F 58 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Add the low double-precision floating-point value from xmm2/mem to xmm1 and store the result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VADDSD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 58 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Add the low double-precision floating-point value from xmm3/mem to xmm2 and store the result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VADDSD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 58 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Add the low double-precision floating-point value from xmm3/m64 to xmm2 and store the result in xmm1 with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S-RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>ADDSS--Add Scalar Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>ADDSS</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">F3 0F 58 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Add the low single-precision floating-point value from xmm2/mem to xmm1 and store the result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VADDSS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 58 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Add the low single-precision floating-point value from xmm3/mem to xmm2 and store the result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VADDSS</mnem>
			<args>xmm1{k1}{z},xmm2,xmm3/m32{er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 58 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Add the low single-precision floating-point value from xmm3/m32 to xmm2 and store the result in xmm1with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VALIGND/VALIGNQ--Align Doubleword/Quadword Vectors.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VALIGND</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.128.66.0F3A.W0 03 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift right and merge vectors xmm2 and xmm3/m128/m32bcst with double-word granularity using imm8 as number of elements to shift, and store the final result in xmm1, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VALIGNQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.128.66.0F3A.W1 03 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift right and merge vectors xmm2 and xmm3/m128/m64bcst with quad-word granularity using imm8 as number of elements to shift, and store the final result in xmm1, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VALIGND</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 03 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift right and merge vectors ymm2 and ymm3/m256/m32bcst with double-word granularity using imm8 as number of elements to shift, and store the final result in ymm1, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VALIGNQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 03 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift right and merge vectors ymm2 and ymm3/m256/m64bcst with quad-word granularity using imm8 as number of elements to shift, and store the final result in ymm1, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VALIGND</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 03 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift right and merge vectors zmm2 and zmm3/m512/m32bcst with double-word granularity using imm8 as number of elements to shift, and store the final result in zmm1, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VALIGNQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 03 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift right and merge vectors zmm2 and zmm3/m512/m64bcst with quad-word granularity using imm8 as number of elements to shift, and store the final result in zmm1, under writemask.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VBLENDMPD/VBLENDMPS--Blend Float64/Float32 Vectors Using an OpMask Control.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VBLENDMPD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 65 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Blend double-precision vector xmm2 and double-precision vector xmm3/m128/m64bcst and store the result in xmm1, under control mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBLENDMPD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 65 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Blend double-precision vector ymm2 and double-precision vector ymm3/m256/m64bcst and store the result in ymm1, under control mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBLENDMPD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 65 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Blend double-precision vector zmm2 and double-precision vector zmm3/m512/m64bcst and store the result in zmm1, under control mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBLENDMPS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 65 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Blend single-precision vector xmm2 and single-precision vector xmm3/m128/m32bcst and store the result in xmm1, under control mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBLENDMPS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 65 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Blend single-precision vector ymm2 and single-precision vector ymm3/m256/m32bcst and store the result in ymm1, under control mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBLENDMPS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 65 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Blend single-precision vector zmm2 and single-precision vector zmm3/m512/m32bcst using k1 as select control and store the result in zmm1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPBLENDMB/VPBLENDMW--Blend Byte/Word Vectors Using an Opmask Control.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPBLENDMB</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F38.W0 66 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Blend byte integer vector xmm2 and byte vector xmm3/m128 and store the result in xmm1, under control mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBLENDMB</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F38.W0 66 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Blend byte integer vector ymm2 and byte vector ymm3/m256 and store the result in ymm1, under control mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBLENDMB</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F38.W0 66 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Blend byte integer vector zmm2 and byte vector zmm3/m512 and store the result in zmm1, under control mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBLENDMW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F38.W1 66 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Blend word integer vector xmm2 and word vector xmm3/m128 and store the result in xmm1, under control mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBLENDMW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F38.W1 66 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Blend word integer vector ymm2 and word vector ymm3/m256 and store the result in ymm1, under control mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBLENDMW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F38.W1 66 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Blend word integer vector zmm2 and word vector zmm3/m512 and store the result in zmm1, under control mask.</dscrp>
		</ins>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPBLENDMD/VPBLENDMQ--Blend Int32/Int64 Vectors Using an OpMask Control.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPBLENDMD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 64 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Blend doubleword integer vector xmm2 and doubleword vector xmm3/m128/m32bcst and store the result in xmm1, under control mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBLENDMD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 64 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Blend doubleword integer vector ymm2 and doubleword vector ymm3/m256/m32bcst and store the result in ymm1, under control mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBLENDMD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 64 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Blend doubleword integer vector zmm2 and doubleword vector zmm3/m512/m32bcst and store the result in zmm1, under control mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBLENDMQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 64 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Blend quadword integer vector xmm2 and quadword vector xmm3/m128/m64bcst and store the result in xmm1, under control mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBLENDMQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 64 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Blend quadword integer vector ymm2 and quadword vector ymm3/m256/m64bcst and store the result in ymm1, under control mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBLENDMQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 64 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Blend quadword integer vector zmm2 and quadword vector zmm3/m512/m64bcst and store the result in zmm1, under control mask.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>ANDPD--Bitwise Logical AND of Packed Double Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>ANDPD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 54 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in xmm1 and xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDPD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F 54 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDPD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F 54 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDPD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 54 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in xmm2 and xmm3/m128/m64bcst subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDPD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 54 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in ymm2 and ymm3/m256/m64bcst subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDPD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 54 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND of packed double-precision floating-point values in zmm2 and zmm3/m512/m64bcst subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>ANDPS--Bitwise Logical AND of Packed Single Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>ANDPS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 54 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in xmm1 and xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDPS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.0F 54 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDPS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.0F 54 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDPS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.0F.W0 54 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in xmm2 and xmm3/m128/m32bcst subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDPS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.0F.W0 54 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in ymm2 and ymm3/m256/m32bcst subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDPS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.0F.W0 54 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in zmm2 and zmm3/m512/m32bcst subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>ANDNPD--Bitwise Logical AND NOT of Packed Double Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>ANDNPD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 55 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND NOT of packed doubleprecision floating-point values in xmm1 and xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDNPD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F 55 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND NOT of packed doubleprecision floating-point values in xmm2 and xmm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDNPD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F 55/r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND NOT of packed doubleprecision floating-point values in ymm2 and ymm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDNPD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 55 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND NOT of packed doubleprecision floating-point values in xmm2 and xmm3/m128/m64bcst subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDNPD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 55 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND NOT of packed doubleprecision floating-point values in ymm2 and ymm3/m256/m64bcst subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDNPD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 55 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND NOT of packed doubleprecision floating-point values in zmm2 and zmm3/m512/m64bcst subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>ANDNPS--Bitwise Logical AND NOT of Packed Single Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>ANDNPS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 55 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND NOT of packed single-precision floating-point values in xmm1 and xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDNPS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.0F 55 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND NOT of packed single-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDNPS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.0F 55 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND NOT of packed single-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDNPS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.0F.W0 55 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in xmm2 and xmm3/m128/m32bcst subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDNPS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.0F.W0 55 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in ymm2 and ymm3/m256/m32bcst subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VANDNPS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.0F.W0 55 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical AND of packed single-precision floating-point values in zmm2 and zmm3/m512/m32bcst subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VBROADCAST--Load with Broadcast Floating-Point Data.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTSS</mnem>
			<args>xmm1,m32</args>
			<opc openc="RM">VEX.128.66.0F38.W0 18 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Broadcast single-precision floating-point element in mem to four locations in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTSS</mnem>
			<args>ymm1,m32</args>
			<opc openc="RM">VEX.256.66.0F38.W0 18 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Broadcast single-precision floating-point element in mem to eight locations in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTSD</mnem>
			<args>ymm1,m64</args>
			<opc openc="RM">VEX.256.66.0F38.W0 19 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Broadcast double-precision floating-point element in mem to four locations in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTF128</mnem>
			<args>ymm1,m128</args>
			<opc openc="RM">VEX.256.66.0F38.W0 1A /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Broadcast 128 bits of floating-point data in mem to low and high 128-bits in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTSD</mnem>
			<args>ymm1 {k1}{z},xmm2/m64</args>
			<opc openc="T1S">EVEX.256.66.0F38.W1 19 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast low double-precision floating-point element in xmm2/m64 to four locations in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTSD</mnem>
			<args>zmm1 {k1}{z},xmm2/m64</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 19 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast low double-precision floating-point element in xmm2/m64 to eight locations in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTF32X2</mnem>
			<args>ymm1 {k1}{z},xmm2/m64</args>
			<opc openc="T2">EVEX.256.66.0F38.W0 19 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Broadcast two single-precision floating-point elements in xmm2/m64 to locations in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTF32X2</mnem>
			<args>zmm1 {k1}{z},xmm2/m64</args>
			<opc openc="T2">EVEX.512.66.0F38.W0 19 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Broadcast two single-precision floating-point elements in xmm2/m64 to locations in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTSS</mnem>
			<args>xmm1 {k1}{z},xmm2/m32</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 18 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast low single-precision floating-point element in xmm2/m32 to all locations in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTSS</mnem>
			<args>ymm1 {k1}{z},xmm2/m32</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 18 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast low single-precision floating-point element in xmm2/m32 to all locations in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTSS</mnem>
			<args>zmm1 {k1}{z},xmm2/m32</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 18 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast low single-precision floating-point element in xmm2/m32 to all locations in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTF32X4</mnem>
			<args>ymm1 {k1}{z},m128</args>
			<opc openc="T4">EVEX.256.66.0F38.W0 1A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast 128 bits of 4 single-precision floating-point data in mem to locations in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTF32X4</mnem>
			<args>zmm1 {k1}{z},m128</args>
			<opc openc="T4">EVEX.512.66.0F38.W0 1A /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast 128 bits of 4 single-precision floating-point data in mem to locations in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTF64X2</mnem>
			<args>ymm1 {k1}{z},m128</args>
			<opc openc="T2">EVEX.256.66.0F38.W1 1A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Broadcast 128 bits of 2 double-precision floating-point data in mem to locations in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTF64X2</mnem>
			<args>zmm1 {k1}{z},m128</args>
			<opc openc="T2">EVEX.512.66.0F38.W1 1A /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Broadcast 128 bits of 2 double-precision floating-point data in mem to locations in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTF32X8</mnem>
			<args>zmm1 {k1}{z},m256</args>
			<opc openc="T8">EVEX.512.66.0F38.W0 1B /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Broadcast 256 bits of 8 single-precision floating-point data in mem to locations in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTF64X4</mnem>
			<args>zmm1 {k1}{z},m256</args>
			<opc openc="T4">EVEX.512.66.0F38.W1 1B /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast 256 bits of 4 double-precision floating-point data in mem to locations in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
			<oprnd2>NA</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T2">
			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
			<oprnd2>NA</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T4">
			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
			<oprnd2>NA</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T8ModRM">
			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
			<oprnd2>NA</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPBROADCASTB/W/D/Q--Load with Broadcast Integer Data from General Purpose Register.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTB</mnem>
			<args>xmm1 {k1}{z},reg</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 7A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Broadcast an 8-bit value from a GPR to all bytes in the 128-bit destination subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTB</mnem>
			<args>ymm1 {k1}{z},reg</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 7A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Broadcast an 8-bit value from a GPR to all bytes in the 256-bit destination subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTB</mnem>
			<args>zmm1 {k1}{z},reg</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 7A /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Broadcast an 8-bit value from a GPR to all bytes in the 512-bit destination subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTW</mnem>
			<args>xmm1 {k1}{z},reg</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 7B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Broadcast a 16-bit value from a GPR to all words in the 128-bit destination subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTW</mnem>
			<args>ymm1 {k1}{z},reg</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 7B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Broadcast a 16-bit value from a GPR to all words in the 256-bit destination subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTW</mnem>
			<args>zmm1 {k1}{z},reg</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 7B /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Broadcast a 16-bit value from a GPR to all words in the 512-bit destination subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTD</mnem>
			<args>xmm1 {k1}{z},r32</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 7C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast a 32-bit value from a GPR to all double-words in the 128-bit destination subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTD</mnem>
			<args>ymm1 {k1}{z},r32</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 7C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast a 32-bit value from a GPR to all double-words in the 256-bit destination subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTD</mnem>
			<args>zmm1 {k1}{z},r32</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 7C /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast a 32-bit value from a GPR to all double-words in the 512-bit destination subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VPBROADCASTQ</mnem>
			<args>xmm1 {k1}{z},r64</args>
			<opc openc="T1S">EVEX.128.66.0F38.W1 7C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast a 64-bit value from a GPR to all quad-words in the 128-bit destination subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VPBROADCASTQ</mnem>
			<args>ymm1 {k1}{z},r64</args>
			<opc openc="T1S">EVEX.256.66.0F38.W1 7C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast a 64-bit value from a GPR to all quad-words in the 256-bit destination subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VPBROADCASTQ</mnem>
			<args>zmm1 {k1}{z},r64</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 7C /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast a 64-bit value from a GPR to all quad-words in the 512-bit destination subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPBROADCAST--Load Integer and Broadcast.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTB</mnem>
			<args>xmm1,xmm2/m8</args>
			<opc openc="RM">VEX.128.66.0F38.W0 78 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Broadcast a byte integer in the source operand to sixteen locations in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTB</mnem>
			<args>ymm1,xmm2/m8</args>
			<opc openc="RM">VEX.256.66.0F38.W0 78 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Broadcast a byte integer in the source operand to thirty-two locations in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTB</mnem>
			<args>xmm1{k1}{z},xmm2/m8</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 78 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Broadcast a byte integer in the source operand to locations in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTB</mnem>
			<args>ymm1{k1}{z},xmm2/m8</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 78 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Broadcast a byte integer in the source operand to locations in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTB</mnem>
			<args>zmm1{k1}{z},xmm2/m8</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 78 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Broadcast a byte integer in the source operand to 64 locations in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTW</mnem>
			<args>xmm1,xmm2/m16</args>
			<opc openc="RM">VEX.128.66.0F38.W0 79 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Broadcast a word integer in the source operand to eight locations in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTW</mnem>
			<args>ymm1,xmm2/m16</args>
			<opc openc="RM">VEX.256.66.0F38.W0 79 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Broadcast a word integer in the source operand to sixteen locations in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTW</mnem>
			<args>xmm1{k1}{z},xmm2/m16</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 79 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Broadcast a word integer in the source operand to locations in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTW</mnem>
			<args>ymm1{k1}{z},xmm2/m16</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 79 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Broadcast a word integer in the source operand to locations in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTW</mnem>
			<args>zmm1{k1}{z},xmm2/m16</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 79 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Broadcast a word integer in the source operand to 32 locations in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTD</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">VEX.128.66.0F38.W0 58 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Broadcast a dword integer in the source operand to four locations in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTD</mnem>
			<args>ymm1,xmm2/m32</args>
			<opc openc="RM">VEX.256.66.0F38.W0 58 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Broadcast a dword integer in the source operand to eight locations in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTD</mnem>
			<args>xmm1 {k1}{z},xmm2/m32</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 58 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast a dword integer in the source operand to locations in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTD</mnem>
			<args>ymm1 {k1}{z},xmm2/m32</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 58 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast a dword integer in the source operand to locations in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTD</mnem>
			<args>zmm1 {k1}{z},xmm2/m32</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 58 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast a dword integer in the source operand to locations in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTQ</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">VEX.128.66.0F38.W0 59 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Broadcast a qword element in source operand to two locations in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTQ</mnem>
			<args>ymm1,xmm2/m64</args>
			<opc openc="RM">VEX.256.66.0F38.W0 59 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Broadcast a qword element in source operand to four locations in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m64</args>
			<opc openc="T1S">EVEX.128.66.0F38.W1 59 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast a qword element in source operand to locations in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTQ</mnem>
			<args>ymm1 {k1}{z},xmm2/m64</args>
			<opc openc="T1S">EVEX.256.66.0F38.W1 59 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast a qword element in source operand to locations in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTQ</mnem>
			<args>zmm1 {k1}{z},xmm2/m64</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 59 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast a qword element in source operand to locations in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTI32x2</mnem>
			<args>xmm1 {k 1}{z},xmm2/m64</args>
			<opc openc="T2">EVEX.128.66.0F38.W0 59 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Broadcast two dword elements in source operand to locations in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTI32x2</mnem>
			<args>ymm1 {k 1}{z},xmm2/m64</args>
			<opc openc="T2">EVEX.256.66.0F38.W0 59 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Broadcast two dword elements in source operand to locations in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTI32x2</mnem>
			<args>zmm1 {k1}{z},xmm2/m64</args>
			<opc openc="T2">EVEX.512.66.0F38.W0 59 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Broadcast two dword elements in source operand to locations in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTI128</mnem>
			<args>ymm1,m128</args>
			<opc openc="RM">VEX.256.66.0F38.W0 5A /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Broadcast 128 bits of integer data in mem to low and high 128-bits in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTI32X4</mnem>
			<args>ymm1 {k1}{z},m128</args>
			<opc openc="T4">EVEX.256.66.0F38.W0 5A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast 128 bits of 4 doubleword integer data in mem to locations in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTI32X4</mnem>
			<args>zmm1 {k1}{z},m128</args>
			<opc openc="T4">EVEX.512.66.0F38.W0 5A /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast 128 bits of 4 doubleword integer data in mem to locations in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTI64X2</mnem>
			<args>ymm1 {k1}{z},m128</args>
			<opc openc="T2">EVEX.256.66.0F38.W1 5A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Broadcast 128 bits of 2 quadword integer data in mem to locations in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTI64X2</mnem>
			<args>zmm1 {k1}{z},m128</args>
			<opc openc="T2">EVEX.512.66.0F38.W1 5A /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Broadcast 128 bits of 2 quadword integer data in mem to locations in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTI32X8</mnem>
			<args>zmm1 {k1}{z},m256</args>
			<opc openc="T8">EVEX.512.66.0F38.W0 5B /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Broadcast 256 bits of 8 doubleword integer data in mem to locations in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VBROADCASTI64X4</mnem>
			<args>zmm1 {k1}{z},m256</args>
			<opc openc="T4">EVEX.512.66.0F38.W1 5B /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Broadcast 256 bits of 4 quadword integer data in mem to locations in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
			<oprnd2>NA</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T2">
			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
			<oprnd2>NA</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T4">
			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
			<oprnd2>NA</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T8ModRM">
			<oprnd1>reg(w) :ModRM:r/m(r)</oprnd1>
			<oprnd2>NA</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CMPPD--Compare Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CMPPD</mnem>
			<args>xmm1,xmm2/m128,imm8</args>
			<opc openc="RMI">66 0F C2 /r ib</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Compare packed double-precision floating-point values in xmm2/m128 and xmm1 using bits 2:0 of imm8 as a comparison predicate.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCMPPD</mnem>
			<args>xmm1,xmm2,xmm3/m128,imm8</args>
			<opc openc="RVMI">VEX.NDS.128.66.0F.WIG C2 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed double-precision floating-point values in xmm3/m128 and xmm2 using bits 4:0 of imm8 as a comparison predicate.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCMPPD</mnem>
			<args>ymm1,ymm2,ymm3/m256,imm8</args>
			<opc openc="RVMI">VEX.NDS.256.66.0F.WIG C2 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed double-precision floating-point values in ymm3/m256 and ymm2 using bits 4:0 of imm8 as a comparison predicate.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCMPPD</mnem>
			<args>k1 {k2},xmm2,xmm3/m128/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 C2 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed double-precision floating-point values in xmm3/m128/m64bcst and xmm2 using bits 4:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCMPPD</mnem>
			<args>k1 {k2},ymm2,ymm3/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 C2 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed double-precision floating-point values in ymm3/m256/m64bcst and ymm2 using bits 4:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCMPPD</mnem>
			<args>k1 {k2},zmm2,zmm3/m512/m64bcst{sae},imm8</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 C2 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed double-precision floating-point values in zmm3/m512/m64bcst and zmm2 using bits 4:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVMI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CMPPS--Compare Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CMPPS</mnem>
			<args>xmm1,xmm2/m128,imm8</args>
			<opc openc="RMI">0F C2 /r ib</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Compare packed single-precision floating-point values in xmm2/m128 and xmm1 using bits 2:0 of imm8 as a comparison predicate.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCMPPS</mnem>
			<args>xmm1,xmm2,xmm3/m128,imm8</args>
			<opc openc="RVMI">VEX.NDS.128.0F.WIG C2 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed single-precision floating-point values in xmm3/m128 and xmm2 using bits 4:0 of imm8 as a comparison predicate.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCMPPS</mnem>
			<args>ymm1,ymm2,ymm3/m256,imm8</args>
			<opc openc="RVMI">VEX.NDS.256.0F.WIG C2 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed single-precision floating-point values in ymm3/m256 and ymm2 using bits 4:0 of imm8 as a comparison predicate.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCMPPS</mnem>
			<args>k1 {k2},xmm2,xmm3/m128/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.128.0F.W0 C2 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed single-precision floating-point values in xmm3/m128/m32bcst and xmm2 using bits 4:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCMPPS</mnem>
			<args>k1 {k2},ymm2,ymm3/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.256.0F.W0 C2 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed single-precision floating-point values in ymm3/m256/m32bcst and ymm2 using bits 4:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCMPPS</mnem>
			<args>k1 {k2},zmm2,zmm3/m512/m32bcst{sae},imm8</args>
			<opc openc="FV">EVEX.NDS.512.0F.W0 C2 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed single-precision floating-point values in zmm3/m512/m32bcst and zmm2 using bits 4:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVMI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CMPSD--Compare Scalar Double-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CMPSD</mnem>
			<args>xmm1,xmm2/m64,imm8</args>
			<opc openc="RMI">F2 0F C2 /r ib</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Compare low double-precision floating-point value in xmm2/m64 and xmm1 using bits 2:0 of imm8 as comparison predicate.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCMPSD</mnem>
			<args>xmm1,xmm2,xmm3/m64,imm8</args>
			<opc openc="RVMI">VEX.NDS.128.F2.0F.WIG C2 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare low double-precision floating-point value in xmm3/m64 and xmm2 using bits 4:0 of imm8 as comparison predicate.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCMPSD</mnem>
			<args>k1 {k2},xmm2,xmm3/m64{sae},imm8</args>
			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 C2 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare low double-precision floating-point value in xmm3/m64 and xmm2 using bits 4:0 of imm8 as comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVMI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CMPSS--Compare Scalar Single-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CMPSS</mnem>
			<args>xmm1,xmm2/m32,imm8</args>
			<opc openc="RMI">F3 0F C2 /r ib</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Compare low single-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of imm8 as comparison predicate.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCMPSS</mnem>
			<args>xmm1,xmm2,xmm3/m32,imm8</args>
			<opc openc="RVMI">VEX.NDS.128.F3.0F.WIG C2 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare low single-precision floating-point value in xmm3/m32 and xmm2 using bits 4:0 of imm8 as comparison predicate.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCMPSS</mnem>
			<args>k1 {k2},xmm2,xmm3/m32{sae},imm8</args>
			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 C2 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare low single-precision floating-point value in xmm3/m32 and xmm2 using bits 4:0 of imm8 as comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVMI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>COMISD--Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS.</brief>
		<ins x32m="V" x64m="V">
			<mnem>COMISD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">66 0F 2F /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/mem64 and set the EFLAGS flags accordingly.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCOMISD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">VEX.128.66.0F.WIG 2F /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/mem64 and set the EFLAGS flags accordingly.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCOMISD</mnem>
			<args>xmm1,xmm2/m64{sae}</args>
			<opc openc="T1S">EVEX.LIG.66.0F.W1 2F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/mem64 and set the EFLAGS flags accordingly.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>COMISS--Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS.</brief>
		<ins x32m="V" x64m="V">
			<mnem>COMISS</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">0F 2F /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCOMISS</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">VEX.128.0F.WIG 2F /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCOMISS</mnem>
			<args>xmm1,xmm2/m32{sae}</args>
			<opc openc="T1S">EVEX.LIG.0F.W0 2F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>DIVPD--Divide Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>DIVPD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 5E /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Divide packed double-precision floating-point values in xmm1 by packed double-precision floating-point values in xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VDIVPD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 5E /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Divide packed double-precision floating-point values in xmm2 by packed double-precision floating-point values in xmm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VDIVPD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 5E /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Divide packed double-precision floating-point values in ymm2 by packed double-precision floating-point values in ymm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VDIVPD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 5E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Divide packed double-precision floating-point values in xmm2 by packed double-precision floating-point values in xmm3/m128/m64bcst and write results to xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VDIVPD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 5E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Divide packed double-precision floating-point values in ymm2 by packed double-precision floating-point values in ymm3/m256/m64bcst and write results to ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VDIVPD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 5E /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Divide packed double-precision floating-point values in zmm2 by packed double-precision FP values in zmm3/m512/m64bcst and write results to zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>DIVPS--Divide Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>DIVPS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 5E /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Divide packed single-precision floating-point values in xmm1 by packed single-precision floating-point values in xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VDIVPS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.0F.WIG 5E /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Divide packed single-precision floating-point values in xmm2 by packed single-precision floating-point values in xmm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VDIVPS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.0F.WIG 5E /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Divide packed single-precision floating-point values in ymm2 by packed single-precision floating-point values in ymm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VDIVPS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.0F.W0 5E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Divide packed single-precision floating-point values in xmm2 by packed single-precision floating-point values in xmm3/m128/m32bcst and write results to xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VDIVPS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.0F.W0 5E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Divide packed single-precision floating-point values in ymm2 by packed single-precision floating-point values in ymm3/m256/m32bcst and write results to ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VDIVPS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.0F.W0 5E /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Divide packed single-precision floating-point values in zmm2 by packed single-precision floating-point values in zmm3/m512/m32bcst and write results to zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>DIVSD--Divide Scalar Double-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>DIVSD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">F2 0F 5E /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Divide low double-precision floating-point value in xmm1 by low double-precision floating-point value in xmm2/m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VDIVSD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 5E /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Divide low double-precision floating-point value in xmm2 by low double-precision floating-point value in xmm3/m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VDIVSD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 5E /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Divide low double-precision floating-point value in xmm2 by low double-precision floating-point value in xmm3/m64.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>DIVSS--Divide Scalar Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>DIVSS</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">F3 0F 5E /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Divide low single-precision floating-point value in xmm1 by low single-precision floating-point value in xmm2/m32.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VDIVSS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 5E /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Divide low single-precision floating-point value in xmm2 by low single-precision floating-point value in xmm3/m32.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VDIVSS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 5E /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Divide low single-precision floating-point value in xmm2 by low single-precision floating-point value in xmm3/m32.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCOMPRESSPD--Store Sparse Packed Double-Precision Floating-Point Values into Dense Memory.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCOMPRESSPD</mnem>
			<args>xmm1/m128 {k1}{z},xmm2</args>
			<opc openc="T1S">EVEX.128.66.0F38.W1 8A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compress packed double-precision floating-point values from xmm2 to xmm1/m128 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCOMPRESSPD</mnem>
			<args>ymm1/m256 {k1}{z},ymm2</args>
			<opc openc="T1S">EVEX.256.66.0F38.W1 8A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compress packed double-precision floating-point values from ymm2 to ymm1/m256 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCOMPRESSPD</mnem>
			<args>zmm1/m512 {k1}{z},zmm2</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 8A /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compress packed double-precision floating-point values from zmm2 using control mask k1 to zmm1/m512.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCOMPRESSPS--Store Sparse Packed Single-Precision Floating-Point Values into Dense Memory.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCOMPRESSPS</mnem>
			<args>xmm1/m128 {k1}{z},xmm2</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 8A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compress packed single-precision floating-point values from xmm2 to xmm1/m128 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCOMPRESSPS</mnem>
			<args>ymm1/m256 {k1}{z},ymm2</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 8A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compress packed single-precision floating-point values from ymm2 to ymm1/m256 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCOMPRESSPS</mnem>
			<args>zmm1/m512 {k1}{z},zmm2</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 8A /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compress packed single-precision floating-point values from zmm2 using control mask k1 to zmm1/m512.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CVTDQ2PD--Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CVTDQ2PD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">F3 0F E6 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Convert two packed signed doubleword integers from xmm2/mem to two packed double-precision floatingpoint values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTDQ2PD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">VEX.128.F3.0F.WIG E6 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert two packed signed doubleword integers from xmm2/mem to two packed double-precision floatingpoint values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTDQ2PD</mnem>
			<args>ymm1,xmm2/m128</args>
			<opc openc="RM">VEX.256.F3.0F.WIG E6 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert four packed signed doubleword integers from xmm2/mem to four packed double-precision floatingpoint values in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTDQ2PD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="HV">EVEX.128.F3.0F.W0 E6 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert 2 packed signed doubleword integers from xmm2/m128/m32bcst to eight packed double-precision floating-point values in xmm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTDQ2PD</mnem>
			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="HV">EVEX.256.F3.0F.W0 E6 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert 4 packed signed doubleword integers from xmm2/m128/m32bcst to 4 packed double-precision floating-point values in ymm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTDQ2PD</mnem>
			<args>zmm1 {k1}{z},ymm2/m256/m32bcst</args>
			<opc openc="HV">EVEX.512.F3.0F.W0 E6 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert eight packed signed doubleword integers from ymm2/m256/m32bcst to eight packed double-precision floating-point values in zmm1 with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="HV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CVTDQ2PS--Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CVTDQ2PS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 5B /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Convert four packed signed doubleword integers from xmm2/mem to four packed single-precision floatingpoint values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTDQ2PS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.0F.WIG 5B /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert four packed signed doubleword integers from xmm2/mem to four packed single-precision floatingpoint values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTDQ2PS</mnem>
			<args>ymm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.0F.WIG 5B /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert eight packed signed doubleword integers from ymm2/mem to eight packed single-precision floatingpoint values in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTDQ2PS</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="FV">EVEX.128.0F.W0 5B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert four packed signed doubleword integers from xmm2/m128/m32bcst to four packed single-precision floating-point values in xmm1with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTDQ2PS</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
			<opc openc="FV">EVEX.256.0F.W0 5B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert eight packed signed doubleword integers from ymm2/m256/m32bcst to eight packed single-precision floating-point values in ymm1with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTDQ2PS</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.512.0F.W0 5B /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert sixteen packed signed doubleword integers from zmm2/m512/m32bcst to sixteen packed singleprecision floating-point values in zmm1with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CVTPD2DQ--Convert Packed Double-Precision Floating-Point Values to Packed Doubleword Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CVTPD2DQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">F2 0F E6 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Convert two packed double-precision floating-point values in xmm2/mem to two signed doubleword integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2DQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.F2.0F.WIG E6 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert two packed double-precision floating-point values in xmm2/mem to two signed doubleword integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2DQ</mnem>
			<args>xmm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.F2.0F.WIG E6 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert four packed double-precision floating-point values in ymm2/mem to four signed doubleword integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2DQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.F2.0F.W1 E6 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert two packed double-precision floating-point values in xmm2/m128/m64bcst to two signed doubleword integers in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2DQ</mnem>
			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.F2.0F.W1 E6 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert four packed double-precision floating-point values in ymm2/m256/m64bcst to four signed doubleword integers in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2DQ</mnem>
			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.512.F2.0F.W1 E6 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert eight packed double-precision floating-point values in zmm2/m512/m64bcst to eight signed doubleword integers in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CVTPD2PS--Convert Packed Double-Precision Floating-Point Values to Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CVTPD2PS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 5A /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Convert two packed double-precision floating-point values in xmm2/mem to two single-precision floating-point values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2PS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.66.0F.WIG 5A /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert two packed double-precision floating-point values in xmm2/mem to two single-precision floating-point values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2PS</mnem>
			<args>xmm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.66.0F.WIG 5A /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert four packed double-precision floating-point values in ymm2/mem to four single-precision floating-point values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2PS</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.66.0F.W1 5A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert two packed double-precision floating-point values in xmm2/m128/m64bcst to two singleprecision floating-point values in xmm1with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2PS</mnem>
			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.66.0F.W1 5A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert four packed double-precision floating-point values in ymm2/m256/m64bcst to four singleprecision floating-point values in xmm1with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2PS</mnem>
			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.512.66.0F.W1 5A /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert eight packed double-precision floating-point values in zmm2/m512/m64bcst to eight singleprecision floating-point values in ymm1with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTPD2QQ--Convert Packed Double-Precision Floating-Point Values to Packed Quadword Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2QQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.66.0F.W1 7B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert two packed double-precision floating-point values from xmm2/m128/m64bcst to two packed quadword integers in xmm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2QQ</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.66.0F.W1 7B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert four packed double-precision floating-point values from ymm2/m256/m64bcst to four packed quadword integers in ymm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2QQ</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.512.66.0F.W1 7B /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert eight packed double-precision floating-point values from zmm2/m512/m64bcst to eight packed quadword integers in zmm1 with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTPD2UDQ--Convert Packed Double-Precision Floating-Point Values to Packed Unsigned Doubleword Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2UDQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.0F.W1 79 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert two packed double-precision floating-point values in xmm2/m128/m64bcst to two unsigned doubleword integers in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2UDQ</mnem>
			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.0F.W1 79 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert four packed double-precision floating-point values in ymm2/m256/m64bcst to four unsigned doubleword integers in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2UDQ</mnem>
			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.512.0F.W1 79 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert eight packed double-precision floating-point values in zmm2/m512/m64bcst to eight unsigned doubleword integers in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTPD2UQQ--Convert Packed Double-Precision Floating-Point Values to Packed Unsigned Quadword Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2UQQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.66.0F.W1 79 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert two packed double-precision floating-point values from xmm2/mem to two packed unsigned quadword integers in xmm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2UQQ</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.66.0F.W1 79 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert fourth packed double-precision floating-point values from ymm2/mem to four packed unsigned quadword integers in ymm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPD2UQQ</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.512.66.0F.W1 79 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert eight packed double-precision floating-point values from zmm2/mem to eight packed unsigned quadword integers in zmm1 with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTPH2PS--Convert 16-bit FP values to Single-Precision FP values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPH2PS</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">VEX.128.66.0F38.W0 1313 /r</opc>
			<cpuid>
				<flag>F16C</flag>
			</cpuid>
			<dscrp>Convert four packed half precision (16-bit) floatingpoint values in xmm2/m64 to packed single-precision floating-point value in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPH2PS</mnem>
			<args>ymm1,xmm2/m128</args>
			<opc openc="RM">VEX.256.66.0F38.W0 1313 /r</opc>
			<cpuid>
				<flag>F16C</flag>
			</cpuid>
			<dscrp>Convert eight packed half precision (16-bit) floatingpoint values in xmm2/m128 to packed singleprecision floating-point value in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPH2PS</mnem>
			<args>xmm1 {k1}{z},xmm2/m64</args>
			<opc openc="HVM">EVEX.128.66.0F38.W0 1313 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert four packed half precision (16-bit) floatingpoint values in xmm2/m64 to packed single-precision floating-point values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPH2PS</mnem>
			<args>ymm1 {k1}{z},xmm2/m128</args>
			<opc openc="HVM">EVEX.256.66.0F38.W0 1313 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert eight packed half precision (16-bit) floatingpoint values in xmm2/m128 to packed singleprecision floating-point values in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPH2PS</mnem>
			<args>zmm1 {k1}{z},ymm2/m256 {sae}</args>
			<opc openc="HVM">EVEX.512.66.0F38.W0 1313 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert sixteen packed half precision (16-bit) floating-point values in ymm2/m256 to packed single-precision floating-point values in zmm1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="HVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTPS2PH--Convert Single-Precision FP value to 16-bit FP value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2PH</mnem>
			<args>xmm1/m64,xmm2,imm8</args>
			<opc openc="MRI">VEX.128.66.0F3A.W0 1D 1D/r ib</opc>
			<cpuid>
				<flag>F16C</flag>
			</cpuid>
			<dscrp>Convert four packed single-precision floating-point values in xmm2 to packed half-precision (16-bit) floating-point values in xmm1/m64. Imm8 provides rounding controls.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2PH</mnem>
			<args>xmm1/m128,ymm2,imm8</args>
			<opc openc="MRI">VEX.256.66.0F3A.W0 1D1D /r ib</opc>
			<cpuid>
				<flag>F16C</flag>
			</cpuid>
			<dscrp>Convert eight packed single-precision floating-point values in ymm2 to packed half-precision (16-bit) floating-point values in xmm1/m128. Imm8 provides rounding controls.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2PH</mnem>
			<args>xmm1/m64 {k1}{z},xmm2,imm8</args>
			<opc openc="HVM">EVEX.128.66.0F3A.W0 1D1D /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert four packed single-precision floating-point values in xmm2 to packed half-precision (16-bit) floating-point values in xmm1/m64. Imm8 provides rounding controls.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2PH</mnem>
			<args>xmm1/m128 {k1}{z},ymm2,imm8</args>
			<opc openc="HVM">EVEX.256.66.0F3A.W0 1D1D /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert eight packed single-precision floating-point values in ymm2 to packed half-precision (16-bit) floating-point values in xmm1/m128. Imm8 provides rounding controls.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2PH</mnem>
			<args>ymm1/m256 {k1}{z},zmm2{sae},imm8</args>
			<opc openc="HVM">EVEX.512.66.0F3A.W0 1D1D /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert sixteen packed single-precision floating-point values in zmm2 to packed half-precision (16-bit) floatingpoint values in ymm1/m256. Imm8 provides rounding controls.</dscrp>
		</ins>
		<oprndenc openc="MRI">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="HVM">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CVTPS2DQ--Convert Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CVTPS2DQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 5B /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Convert four packed single-precision floating-point values from xmm2/mem to four packed signed doubleword values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2DQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.66.0F.WIG 5B /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert four packed single-precision floating-point values from xmm2/mem to four packed signed doubleword values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2DQ</mnem>
			<args>ymm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.66.0F.WIG 5B /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert eight packed single-precision floating-point values from ymm2/mem to eight packed signed doubleword values in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2DQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="FV">EVEX.128.66.0F.W0 5B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed signed doubleword values in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2DQ</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
			<opc openc="FV">EVEX.256.66.0F.W0 5B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed signed doubleword values in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2DQ</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.512.66.0F.W0 5B /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert sixteen packed single-precision floating-point values from zmm2/m512/m32bcst to sixteen packed signed doubleword values in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTPS2UDQ--Convert Packed Single-Precision Floating-Point Values to Packed Unsigned Doubleword Integer Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2UDQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="FV">EVEX.128.0F.W0 79 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed unsigned doubleword values in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2UDQ</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
			<opc openc="FV">EVEX.256.0F.W0 79 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed unsigned doubleword values in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2UDQ</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.512.0F.W0 79 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert sixteen packed single-precision floating-point values from zmm2/m512/m32bcst to sixteen packed unsigned doubleword values in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTPS2QQ--Convert Packed Single Precision Floating-Point Values to Packed Singed Quadword Integer Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2QQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m64/m32bcst</args>
			<opc openc="HV">EVEX.128.66.0F.W0 7B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert two packed single precision floating-point values from xmm2/m64/m32bcst to two packed signed quadword values in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2QQ</mnem>
			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="HV">EVEX.256.66.0F.W0 7B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed signed quadword values in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2QQ</mnem>
			<args>zmm1 {k1}{z},ymm2/m256/m32bcst{er}</args>
			<opc openc="HV">EVEX.512.66.0F.W0 7B /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed signed quadword values in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="HV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTPS2UQQ--Convert Packed Single Precision Floating-Point Values to Packed Unsigned Quadword Integer Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2UQQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m64/m32bcst</args>
			<opc openc="HV">EVEX.128.66.0F.W0 79 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert two packed single precision floating-point values from zmm2/m64/m32bcst to two packed unsigned quadword values in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2UQQ</mnem>
			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="HV">EVEX.256.66.0F.W0 79 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed unsigned quadword values in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2UQQ</mnem>
			<args>zmm1 {k1}{z},ymm2/m256/m32bcst{er}</args>
			<opc openc="HV">EVEX.512.66.0F.W0 79 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed unsigned quadword values in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="HV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CVTPS2PD--Convert Packed Single-Precision Floating-Point Values to Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CVTPS2PD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">0F 5A /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Convert two packed single-precision floating-point values in xmm2/m64 to two packed double-precision floating-point values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2PD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">VEX.128.0F.WIG 5A /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert two packed single-precision floating-point values in xmm2/m64 to two packed double-precision floating-point values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2PD</mnem>
			<args>ymm1,xmm2/m128</args>
			<opc openc="RM">VEX.256.0F.WIG 5A /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert four packed single-precision floating-point values in xmm2/m128 to four packed double-precision floatingpoint values in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2PD</mnem>
			<args>xmm1 {k1}{z},xmm2/m64/m32bcst</args>
			<opc openc="HV">EVEX.128.0F.W0 5A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert two packed single-precision floating-point values in xmm2/m64/m32bcst to packed double-precision floatingpoint values in xmm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2PD</mnem>
			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="HV">EVEX.256.0F.W0 5A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
			</cpuid>
			<dscrp>Convert four packed single-precision floating-point values in xmm2/m128/m32bcst to packed double-precision floating-point values in ymm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTPS2PD</mnem>
			<args>zmm1 {k1}{z},ymm2/m256/m32bcst{sae}</args>
			<opc openc="HV">EVEX.512.0F.W0 5A /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert eight packed single-precision floating-point values in ymm2/m256/b32bcst to eight packed double-precision floating-point values in zmm1 with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="HV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTQQ2PD--Convert Packed Quadword Integers to Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTQQ2PD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.F3.0F.W1 E6 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert two packed quadword integers from xmm2/m128/m64bcst to packed double-precision floatingpoint values in xmm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTQQ2PD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.F3.0F.W1 E6 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert four packed quadword integers from ymm2/m256/m64bcst to packed double-precision floatingpoint values in ymm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTQQ2PD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.512.F3.0F.W1 E6 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert eight packed quadword integers from zmm2/m512/m64bcst to eight packed double-precision floating-point values in zmm1 with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTQQ2PS--Convert Packed Quadword Integers to Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTQQ2PS</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.0F.W1 5B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert two packed quadword integers from xmm2/mem to packed single-precision floating-point values in xmm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTQQ2PS</mnem>
			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.0F.W1 5B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert four packed quadword integers from ymm2/mem to packed single-precision floating-point values in xmm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTQQ2PS</mnem>
			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.512.0F.W1 5B /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert eight packed quadword integers from zmm2/mem to eight packed single-precision floating-point values in ymm1 with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CVTSD2SI--Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CVTSD2SI</mnem>
			<args>r32,xmm1/m64</args>
			<opc openc="RM">F2 0F 2D /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer r32.</dscrp>
		</ins>
		<ins x32m="NE" x64m="V">
			<mnem>CVTSD2SI</mnem>
			<args>r64,xmm1/m64</args>
			<opc openc="RM">F2 REX.W 0F 2D /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed quadword integer signextended into r64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTSD2SI</mnem>
			<args>r32,xmm1/m64</args>
			<opc openc="RM">VEX.128.F2.0F.W0 2D /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer r32.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VCVTSD2SI</mnem>
			<args>r64,xmm1/m64</args>
			<opc openc="RM">VEX.128.F2.0F.W1 2D /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed quadword integer signextended into r64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTSD2SI</mnem>
			<args>r32,xmm1/m64{er}</args>
			<opc openc="T1F">EVEX.LIG.F2.0F.W0 2D /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer r32.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VCVTSD2SI</mnem>
			<args>r64,xmm1/m64{er}</args>
			<opc openc="T1F">EVEX.LIG.F2.0F.W1 2D /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed quadword integer signextended into r64.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1F">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTSD2USI--Convert Scalar Double-Precision Floating-Point Value to Unsigned Doubleword Integer.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTSD2USI</mnem>
			<args>r32,xmm1/m64{er}</args>
			<opc openc="T1F">EVEX.LIG.F2.0F.W0 79 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one unsigned doubleword integer r32.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VCVTSD2USI</mnem>
			<args>r64,xmm1/m64{er}</args>
			<opc openc="T1F">EVEX.LIG.F2.0F.W1 79 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one unsigned quadword integer zeroextended into r64.</dscrp>
		</ins>
		<oprndenc openc="T1F">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CVTSD2SS--Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CVTSD2SS</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">F2 0F 5A /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value in xmm2/m64 to one single-precision floating-point value in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTSD2SS</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 5A /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value in xmm3/m64 to one single-precision floating-point value and merge with high bits in xmm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTSD2SS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 5A /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value in xmm3/m64 to one single-precision floating-point value and merge with high bits in xmm2 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CVTSI2SD--Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CVTSI2SD</mnem>
			<args>xmm1,r32/m32</args>
			<opc openc="RM">F2 0F 2A /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Convert one signed doubleword integer from r32/m32 to one double-precision floating-point value in xmm1.</dscrp>
		</ins>
		<ins x32m="NE" x64m="V">
			<mnem>CVTSI2SD</mnem>
			<args>xmm1,r/m64</args>
			<opc openc="RM">F2 REX.W 0F 2A /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Convert one signed quadword integer from r/m64 to one double-precision floating-point value in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTSI2SD</mnem>
			<args>xmm1,xmm2,r/m32</args>
			<opc openc="RVM">VEX.NDS.128.F2.0F.W0 2A /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert one signed doubleword integer from r/m32 to one double-precision floating-point value in xmm1.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VCVTSI2SD</mnem>
			<args>xmm1,xmm2,r/m64</args>
			<opc openc="RVM">VEX.NDS.128.F2.0F.W1 2A /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert one signed quadword integer from r/m64 to one double-precision floating-point value in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTSI2SD</mnem>
			<args>xmm1,xmm2,r/m32</args>
			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W0 2A /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one signed doubleword integer from r/m32 to one double-precision floating-point value in xmm1.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VCVTSI2SD</mnem>
			<args>xmm1,xmm2,r/m64{er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 2A /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one signed quadword integer from r/m64 to one double-precision floating-point value in xmm1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CVTSI2SS--Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CVTSI2SS</mnem>
			<args>xmm1,r/m32</args>
			<opc openc="RM">F3 0F 2A /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Convert one signed doubleword integer from r/m32 to one single-precision floating-point value in xmm1.</dscrp>
		</ins>
		<ins x32m="NE" x64m="V">
			<mnem>CVTSI2SS</mnem>
			<args>xmm1,r/m64</args>
			<opc openc="RM">F3 REX.W 0F 2A /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Convert one signed quadword integer from r/m64 to one single-precision floating-point value in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTSI2SS</mnem>
			<args>xmm1,xmm2,r/m32</args>
			<opc openc="RVM">VEX.NDS.128.F3.0F.W0 2A /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert one signed doubleword integer from r/m32 to one single-precision floating-point value in xmm1.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VCVTSI2SS</mnem>
			<args>xmm1,xmm2,r/m64</args>
			<opc openc="RVM">VEX.NDS.128.F3.0F.W1 2A /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert one signed quadword integer from r/m64 to one single-precision floating-point value in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTSI2SS</mnem>
			<args>xmm1,xmm2,r/m32{er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 2A /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one signed doubleword integer from r/m32 to one single-precision floating-point value in xmm1.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VCVTSI2SS</mnem>
			<args>xmm1,xmm2,r/m64{er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W1 2A /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one signed quadword integer from r/m64 to one single-precision floating-point value in xmm1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CVTSS2SD--Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CVTSS2SD</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">F3 0F 5A /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value in xmm2/m32 to one double-precision floating-point value in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTSS2SD</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 5A /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value in xmm3/m32 to one double-precision floating-point value and merge with high bits of xmm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTSS2SD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 5A /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value in xmm3/m32 to one double-precision floating-point value and merge with high bits of xmm2 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CVTSS2SI--Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CVTSS2SI</mnem>
			<args>r32,xmm1/m32</args>
			<opc openc="RM">F3 0F 2D /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32.</dscrp>
		</ins>
		<ins x32m="NE" x64m="V">
			<mnem>CVTSS2SI</mnem>
			<args>r64,xmm1/m32</args>
			<opc openc="RM">F3 REX.W 0F 2D /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTSS2SI</mnem>
			<args>r32,xmm1/m32</args>
			<opc openc="RM">VEX.128.F3.0F.W0 2D /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VCVTSS2SI</mnem>
			<args>r64,xmm1/m32</args>
			<opc openc="RM">VEX.128.F3.0F.W1 2D /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTSS2SI</mnem>
			<args>r32,xmm1/m32{er}</args>
			<opc openc="T1F">EVEX.LIG.F3.0F.W0 2D /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VCVTSS2SI</mnem>
			<args>r64,xmm1/m32{er}</args>
			<opc openc="T1F">EVEX.LIG.F3.0F.W1 2D /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1F">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTSS2USI--Convert Scalar Single-Precision Floating-Point Value to Unsigned Doubleword Integer.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTSS2USI</mnem>
			<args>r32,xmm1/m32{er}</args>
			<opc openc="T1F">EVEX.LIG.F3.0F.W0 79 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one unsigned doubleword integer in r32.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VCVTSS2USI</mnem>
			<args>r64,xmm1/m32{er}</args>
			<opc openc="T1F">EVEX.LIG.F3.0F.W1 79 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one unsigned quadword integer in r64.</dscrp>
		</ins>
		<oprndenc openc="T1F">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CVTTPD2DQ--Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CVTTPD2DQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F E6 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Convert two packed double-precision floating-point values in xmm2/mem to two signed doubleword integers in xmm1 using truncation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPD2DQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.66.0F.WIG E6 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert two packed double-precision floating-point values in xmm2/mem to two signed doubleword integers in xmm1 using truncation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPD2DQ</mnem>
			<args>xmm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.66.0F.WIG E6 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert four packed double-precision floating-point values in ymm2/mem to four signed doubleword integers in xmm1 using truncation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPD2DQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.66.0F.W1 E6 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert two packed double-precision floating-point values in xmm2/m128/m64bcst to two signed doubleword integers in xmm1 using truncation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPD2DQ</mnem>
			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.66.0F.W1 E6 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert four packed double-precision floating-point values in ymm2/m256/m64bcst to four signed doubleword integers in xmm1 using truncation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPD2DQ</mnem>
			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{sae}</args>
			<opc openc="FV">EVEX.512.66.0F.W1 E6 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert eight packed double-precision floating-point values in zmm2/m512/m64bcst to eight signed doubleword integers in ymm1 using truncation subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTTPD2QQ--Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Quadword Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPD2QQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.66.0F.W1 7A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert two packed double-precision floating-point values from zmm2/m128/m64bcst to two packed quadword integers in zmm1 using truncation with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPD2QQ</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.66.0F.W1 7A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert four packed double-precision floating-point values from ymm2/m256/m64bcst to four packed quadword integers in ymm1 using truncation with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPD2QQ</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{sae}</args>
			<opc openc="FV">EVEX.512.66.0F.W1 7A /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert eight packed double-precision floating-point values from zmm2/m512 to eight packed quadword integers in zmm1 using truncation with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTTPD2UDQ--Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Unsigned Doubleword Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPD2UDQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.0F.W1 78 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert two packed double-precision floating-point values in xmm2/m128/m64bcst to two unsigned doubleword integers in xmm1 using truncation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPD2UDQ</mnem>
			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.0F.W1 78 02 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert four packed double-precision floating-point values in ymm2/m256/m64bcst to four unsigned doubleword integers in xmm1 using truncation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPD2UDQ</mnem>
			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{sae}</args>
			<opc openc="FV">EVEX.512.0F.W1 78 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert eight packed double-precision floating-point values in zmm2/m512/m64bcst to eight unsigned doubleword integers in ymm1 using truncation subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTTPD2UQQ--Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Unsigned Quadword Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPD2UQQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.66.0F.W1 78 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert two packed double-precision floating-point values from xmm2/m128/m64bcst to two packed unsigned quadword integers in xmm1 using truncation with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPD2UQQ</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.66.0F.W1 78 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert four packed double-precision floating-point values from ymm2/m256/m64bcst to four packed unsigned quadword integers in ymm1 using truncation with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPD2UQQ</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{sae}</args>
			<opc openc="FV">EVEX.512.66.0F.W1 78 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert eight packed double-precision floating-point values from zmm2/mem to eight packed unsigned quadword integers in zmm1 using truncation with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CVTTPS2DQ--Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Signed Doubleword Integer Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CVTTPS2DQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">F3 0F 5B /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Convert four packed single-precision floating-point values from xmm2/mem to four packed signed doubleword values in xmm1 using truncation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPS2DQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.F3.0F.WIG 5B /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert four packed single-precision floating-point values from xmm2/mem to four packed signed doubleword values in xmm1 using truncation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPS2DQ</mnem>
			<args>ymm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.F3.0F.WIG 5B /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert eight packed single-precision floating-point values from ymm2/mem to eight packed signed doubleword values in ymm1 using truncation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPS2DQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="FV">EVEX.128.F3.0F.W0 5B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed signed doubleword values in xmm1 using truncation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPS2DQ</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
			<opc openc="FV">EVEX.256.F3.0F.W0 5B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed signed doubleword values in ymm1 using truncation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPS2DQ</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst {sae}</args>
			<opc openc="FV">EVEX.512.F3.0F.W0 5B /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert sixteen packed single-precision floating-point values from zmm2/m512/m32bcst to sixteen packed signed doubleword values in zmm1 using truncation subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTTPS2UDQ--Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Unsigned Doubleword Integer Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPS2UDQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="FV">EVEX.128.0F.W0 78 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed unsigned doubleword values in xmm1 using truncation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPS2UDQ</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
			<opc openc="FV">EVEX.256.0F.W0 78 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed unsigned doubleword values in ymm1 using truncation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPS2UDQ</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{sae}</args>
			<opc openc="FV">EVEX.512.0F.W0 78 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert sixteen packed single-precision floatingpoint values from zmm2/m512/m32bcst to sixteen packed unsigned doubleword values in zmm1 using truncation subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTTPS2QQ--Convert with Truncation Packed Single Precision Floating-Point Values to Packed Singed Quadword Integer Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPS2QQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m64/m32bcst</args>
			<opc openc="HV">EVEX.128.66.0F.W0 7A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert two packed single precision floating-point values from xmm2/m64/m32bcst to two packed signed quadword values in xmm1 using truncation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPS2QQ</mnem>
			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="HV">EVEX.256.66.0F.W0 7A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed signed quadword values in ymm1 using truncation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPS2QQ</mnem>
			<args>zmm1 {k1}{z},ymm2/m256/m32bcst{sae}</args>
			<opc openc="HV">EVEX.512.66.0F.W0 7A /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed signed quadword values in zmm1 using truncation subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="HV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTTPS2UQQ--Convert with Truncation Packed Single Precision Floating-Point Values to Packed Unsigned Quadword Integer Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPS2UQQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m64/m32bcst</args>
			<opc openc="HV">EVEX.128.66.0F.W0 78 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert two packed single precision floating-point values from zmm2/m64/m32bcst to two packed unsigned quadword values in zmm1 using truncation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPS2UQQ</mnem>
			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="HV">EVEX.256.66.0F.W0 78 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert four packed single precision floating-point values from xmm2/m128/m32bcst to four packed unsigned quadword values in ymm1 using truncation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTPS2UQQ</mnem>
			<args>zmm1 {k1}{z},ymm2/m256/m32bcst{sae}</args>
			<opc openc="HV">EVEX.512.66.0F.W0 78 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert eight packed single precision floating-point values from ymm2/m256/m32bcst to eight packed unsigned quadword values in zmm1 using truncation subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="HV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CVTTSD2SI--Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Integer.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CVTTSD2SI</mnem>
			<args>r32,xmm1/m64</args>
			<opc openc="RM">F2 0F 2C /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer in r32 using truncation.</dscrp>
		</ins>
		<ins x32m="NE" x64m="V">
			<mnem>CVTTSD2SI</mnem>
			<args>r64,xmm1/m64</args>
			<opc openc="RM">F2 REX.W 0F 2C /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed quadword integer in r64 using truncation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTSD2SI</mnem>
			<args>r32,xmm1/m64</args>
			<opc openc="RM">VEX.128.F2.0F.W0 2C /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer in r32 using truncation.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VCVTTSD2SI</mnem>
			<args>r64,xmm1/m64</args>
			<opc openc="T1F">VEX.128.F2.0F.W1 2C /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed quadword integer in r64 using truncation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTSD2SI</mnem>
			<args>r32,xmm1/m64{sae}</args>
			<opc openc="T1F">EVEX.LIG.F2.0F.W0 2C /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed doubleword integer in r32 using truncation.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VCVTTSD2SI</mnem>
			<args>r64,xmm1/m64{sae}</args>
			<opc openc="T1F">EVEX.LIG.F2.0F.W1 2C /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one signed quadword integer in r64 using truncation.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1F">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTTSD2USI--Convert with Truncation Scalar Double-Precision Floating-Point Value to Unsigned Integer.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTSD2USI</mnem>
			<args>r32,xmm1/m64{sae}</args>
			<opc openc="T1F">EVEX.LIG.F2.0F.W0 78 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one unsigned doubleword integer r32 using truncation.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VCVTTSD2USI</mnem>
			<args>r64,xmm1/m64{sae}</args>
			<opc openc="T1F">EVEX.LIG.F2.0F.W1 78 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one double-precision floating-point value from xmm1/m64 to one unsigned quadword integer zeroextended into r64 using truncation.</dscrp>
		</ins>
		<oprndenc openc="T1F">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CVTTSS2SI--Convert with Truncation Scalar Single-Precision Floating-Point Value to Integer.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CVTTSS2SI</mnem>
			<args>r32,xmm1/m32</args>
			<opc openc="RM">F3 0F 2C /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32 using truncation.</dscrp>
		</ins>
		<ins x32m="NE" x64m="V">
			<mnem>CVTTSS2SI</mnem>
			<args>r64,xmm1/m32</args>
			<opc openc="RM">F3 REX.W 0F 2C /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64 using truncation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTSS2SI</mnem>
			<args>r32,xmm1/m32</args>
			<opc openc="RM">VEX.128.F3.0F.W0 2C /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32 using truncation.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VCVTTSS2SI</mnem>
			<args>r64,xmm1/m32</args>
			<opc openc="RM">VEX.128.F3.0F.W1 2C /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64 using truncation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTSS2SI</mnem>
			<args>r32,xmm1/m32{sae}</args>
			<opc openc="T1F">EVEX.LIG.F3.0F.W0 2C /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed doubleword integer in r32 using truncation.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VCVTTSS2SI</mnem>
			<args>r64,xmm1/m32{sae}</args>
			<opc openc="T1F">EVEX.LIG.F3.0F.W1 2C /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one signed quadword integer in r64 using truncation.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1F">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTTSS2USI--Convert with Truncation Scalar Single-Precision Floating-Point Value to Unsigned Integer.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTTSS2USI</mnem>
			<args>r32,xmm1/m32{sae}</args>
			<opc openc="T1F">EVEX.LIG.F3.0F.W0 78 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one unsigned doubleword integer in r32 using truncation.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VCVTTSS2USI</mnem>
			<args>r64,xmm1/m32{sae}</args>
			<opc openc="T1F">EVEX.LIG.F3.0F.W1 78 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one single-precision floating-point value from xmm1/m32 to one unsigned quadword integer in r64 using truncation.</dscrp>
		</ins>
		<oprndenc openc="T1F">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTUDQ2PD--Convert Packed Unsigned Doubleword Integers to Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTUDQ2PD</mnem>
			<args>xmm1 {k1}{z},xmm2/m64/m32bcst</args>
			<opc openc="HV">EVEX.128.F3.0F.W0 7A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert two packed unsigned doubleword integers from ymm2/m64/m32bcst to packed double-precision floating-point values in zmm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTUDQ2PD</mnem>
			<args>ymm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="HV">EVEX.256.F3.0F.W0 7A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed doubleprecision floating-point values in zmm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTUDQ2PD</mnem>
			<args>zmm1 {k1}{z},ymm2/m256/m32bcst</args>
			<opc openc="HV">EVEX.512.F3.0F.W0 7A /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert eight packed unsigned doubleword integers from ymm2/m256/m32bcst to eight packed doubleprecision floating-point values in zmm1 with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="HV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTUDQ2PS--Convert Packed Unsigned Doubleword Integers to Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTUDQ2PS</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="FV">EVEX.128.F2.0F.W0 7A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed single-precision floating-point values in xmm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTUDQ2PS</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
			<opc openc="FV">EVEX.256.F2.0F.W0 7A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert eight packed unsigned doubleword integers from ymm2/m256/m32bcst to packed single-precision floating-point values in zmm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTUDQ2PS</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.512.F2.0F.W0 7A /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert sixteen packed unsigned doubleword integers from zmm2/m512/m32bcst to sixteen packed singleprecision floating-point values in zmm1 with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTUQQ2PD--Convert Packed Unsigned Quadword Integers to Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTUQQ2PD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.F3.0F.W1 7A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert two packed unsigned quadword integers from xmm2/m128/m64bcst to two packed double-precision floating-point values in xmm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTUQQ2PD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.F3.0F.W1 7A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert four packed unsigned quadword integers from ymm2/m256/m64bcst to packed double-precision floatingpoint values in ymm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTUQQ2PD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.512.F3.0F.W1 7A /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert eight packed unsigned quadword integers from zmm2/m512/m64bcst to eight packed double-precision floating-point values in zmm1 with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTUQQ2PS--Convert Packed Unsigned Quadword Integers to Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTUQQ2PS</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.F2.0F.W1 7A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert two packed unsigned quadword integers from xmm2/m128/m64bcst to packed single-precision floatingpoint values in zmm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTUQQ2PS</mnem>
			<args>xmm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.F2.0F.W1 7A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert four packed unsigned quadword integers from ymm2/m256/m64bcst to packed single-precision floatingpoint values in xmm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VCVTUQQ2PS</mnem>
			<args>ymm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.512.F2.0F.W1 7A /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Convert eight packed unsigned quadword integers from zmm2/m512/m64bcst to eight packed single-precision floating-point values in zmm1 with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTUSI2SD--Convert Unsigned Integer to Scalar Double-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTUSI2SD</mnem>
			<args>xmm1,xmm2,r/m32</args>
			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W0 7B /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one unsigned doubleword integer from r/m32 to one double-precision floating-point value in xmm1.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VCVTUSI2SD</mnem>
			<args>xmm1,xmm2,r/m64{er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 7B /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one unsigned quadword integer from r/m64 to one double-precision floating-point value in xmm1.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VCVTUSI2SS--Convert Unsigned Integer to Scalar Single-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VCVTUSI2SS</mnem>
			<args>xmm1,xmm2,r/m32{er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 7B /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one signed doubleword integer from r/m32 to one single-precision floating-point value in xmm1.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VCVTUSI2SS</mnem>
			<args>xmm1,xmm2,r/m64{er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W1 7B /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert one signed quadword integer from r/m64 to one single-precision floating-point value in xmm1.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VDBPSADBW--Double Block Packed Sum-Absolute-Differences (SAD) on Unsigned Bytes.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VDBPSADBW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128,imm8</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F3A.W0 42 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compute packed SAD word results of unsigned bytes in dword block from xmm2 with unsigned bytes of dword blocks transformed from xmm3/m128 using the shuffle controls in imm8. Results are written to xmm1 under the writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VDBPSADBW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256,imm8</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F3A.W0 42 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compute packed SAD word results of unsigned bytes in dword block from ymm2 with unsigned bytes of dword blocks transformed from ymm3/m256 using the shuffle controls in imm8. Results are written to ymm1 under the writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VDBPSADBW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512,imm8</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F3A.W0 42 /r ib</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compute packed SAD word results of unsigned bytes in dword block from zmm2 with unsigned bytes of dword blocks transformed from zmm3/m512 using the shuffle controls in imm8. Results are written to zmm1 under the writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VEXPANDPD--Load Sparse Packed Double-Precision Floating-Point Values from Dense Memory.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VEXPANDPD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128</args>
			<opc openc="T1S">EVEX.128.66.0F38.W1 88 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Expand packed double-precision floating-point values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VEXPANDPD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="T1S">EVEX.256.66.0F38.W1 88 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Expand packed double-precision floating-point values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VEXPANDPD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 88 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Expand packed double-precision floating-point values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VEXPANDPS--Load Sparse Packed Single-Precision Floating-Point Values from Dense Memory.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VEXPANDPS</mnem>
			<args>xmm1 {k1}{z},xmm2/m128</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 88 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Expand packed single-precision floating-point values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VEXPANDPS</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 88 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Expand packed single-precision floating-point values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VEXPANDPS</mnem>
			<args>zmm1 {k1}{z},zmm2/m512</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 88 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Expand packed single-precision floating-point values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VEXTRACTF128/VEXTRACTF32x4/VEXTRACTF64x2/VEXTRACTF32x8/VEXTRACTF64x4--Extr act Packed Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VEXTRACTF128</mnem>
			<args>xmm1/m128,ymm2,imm8</args>
			<opc openc="RMI">VEX.256.66.0F3A.W0 19 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Extract 128 bits of packed floating-point values from ymm2 and store results in xmm1/m128.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VEXTRACTF32X4</mnem>
			<args>xmm1/m128 {k1}{z},ymm2,imm8</args>
			<opc openc="T4">EVEX.256.66.0F3A.W0 19 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Extract 128 bits of packed single-precision floatingpoint values from ymm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VEXTRACTF32x4</mnem>
			<args>xmm1/m128 {k1}{z},zmm2,imm8</args>
			<opc openc="T4">EVEX.512.66.0F3A.W0 19 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Extract 128 bits of packed single-precision floatingpoint values from zmm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VEXTRACTF64X2</mnem>
			<args>xmm1/m128 {k1}{z},ymm2,imm8</args>
			<opc openc="T2">EVEX.256.66.0F3A.W1 19 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Extract 128 bits of packed double-precision floating-point values from ymm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VEXTRACTF64X2</mnem>
			<args>xmm1/m128 {k1}{z},zmm2,imm8</args>
			<opc openc="T2">EVEX.512.66.0F3A.W1 19 /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Extract 128 bits of packed double-precision floating-point values from zmm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VEXTRACTF32X8</mnem>
			<args>ymm1/m256 {k1}{z},zmm2,imm8</args>
			<opc openc="T8">EVEX.512.66.0F3A.W0 1B /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Extract 256 bits of packed single-precision floatingpoint values from zmm2 and store results in ymm1/m256 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VEXTRACTF64x4</mnem>
			<args>ymm1/m256 {k1}{z},zmm2,imm8</args>
			<opc openc="T4">EVEX.512.66.0F3A.W1 1B /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Extract 256 bits of packed double-precision floating-point values from zmm2 and store results in ymm1/m256 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T2">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T4">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T8">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VEXTRACTI128/VEXTRACTI32x4/VEXTRACTI64x2/VEXTRACTI32x8/VEXTRACTI64x4--Extract packed Integer Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VEXTRACTI128</mnem>
			<args>xmm1/m128,ymm2,imm8</args>
			<opc openc="RMI">VEX.256.66.0F3A.W0 39 /r ib</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Extract 128 bits of integer data from ymm2 and store results in xmm1/m128.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VEXTRACTI32X4</mnem>
			<args>xmm1/m128 {k1}{z},ymm2,imm8</args>
			<opc openc="T4">EVEX.256.66.0F3A.W0 39 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Extract 128 bits of double-word integer values from ymm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VEXTRACTI32x4</mnem>
			<args>xmm1/m128 {k1}{z},zmm2,imm8</args>
			<opc openc="T4">EVEX.512.66.0F3A.W0 39 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Extract 128 bits of double-word integer values from zmm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VEXTRACTI64X2</mnem>
			<args>xmm1/m128 {k1}{z},ymm2,imm8</args>
			<opc openc="T2">EVEX.256.66.0F3A.W1 39 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Extract 128 bits of quad-word integer values from ymm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VEXTRACTI64X2</mnem>
			<args>xmm1/m128 {k1}{z},zmm2,imm8</args>
			<opc openc="T2">EVEX.512.66.0F3A.W1 39 /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Extract 128 bits of quad-word integer values from zmm2 and store results in xmm1/m128 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VEXTRACTI32X8</mnem>
			<args>ymm1/m256 {k1}{z},zmm2,imm8</args>
			<opc openc="T8">EVEX.512.66.0F3A.W0 3B /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Extract 256 bits of double-word integer values from zmm2 and store results in ymm1/m256 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VEXTRACTI64x4</mnem>
			<args>ymm1/m256 {k1}{z},zmm2,imm8</args>
			<opc openc="T4">EVEX.512.66.0F3A.W1 3B /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Extract 256 bits of quad-word integer values from zmm2 and store results in ymm1/m256 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T2">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T4">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T8">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>EXTRACTPS--Extract Packed Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>EXTRACTPS</mnem>
			<args>reg/m32,xmm1,imm8</args>
			<opc openc="RMI">66 0F 3A 17 /r ib</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Extract one single-precision floating-point value from xmm1 at the offset specified by imm8 and store the result in reg or m32. Zero extend the results in 64-bit register if applicable.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VEXTRACTPS</mnem>
			<args>reg/m32,xmm1,imm8</args>
			<opc openc="RMI">VEX.128.66.0F3A.WIG 17 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Extract one single-precision floating-point value from xmm1 at the offset specified by imm8 and store the result in reg or m32. Zero extend the results in 64-bit register if applicable.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VEXTRACTPS</mnem>
			<args>reg/m32,xmm1,imm8</args>
			<opc openc="T1S">EVEX.128.66.0F3A.WIG 17 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Extract one single-precision floating-point value from xmm1 at the offset specified by imm8 and store the result in reg or m32. Zero extend the results in 64-bit register if applicable.</dscrp>
		</ins>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFIXUPIMMPD--Fix Up Special Packed Float64 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFIXUPIMMPD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.128.66.0F3A.W1 54 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Fix up special numbers in float64 vector xmm1, float64 vector xmm2 and int64 vector xmm3/m128/m64bcst and store the result in xmm1, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFIXUPIMMPD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 54 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Fix up special numbers in float64 vector ymm1, float64 vector ymm2 and int64 vector ymm3/m256/m64bcst and store the result in ymm1, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFIXUPIMMPD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{sae},imm8</args>
			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 54 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Fix up elements of float64 vector in zmm2 using int64 vector table in zmm3/m512/m64bcst, combine with preserved elements from zmm1, and store the result in zmm1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFIXUPIMMPS--Fix Up Special Packed Float32 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFIXUPIMMPS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.128.66.0F3A.W0 54 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Fix up special numbers in float32 vector xmm1, float32 vector xmm2 and int32 vector xmm3/m128/m32bcst and store the result in xmm1, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFIXUPIMMPS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 54 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Fix up special numbers in float32 vector ymm1, float32 vector ymm2 and int32 vector ymm3/m256/m32bcst and store the result in ymm1, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFIXUPIMMPS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{sae},imm8</args>
			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 54 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Fix up elements of float32 vector in zmm2 using int32 vector table in zmm3/m512/m32bcst, combine with preserved elements from zmm1, and store the result in zmm1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFIXUPIMMSD--Fix Up Special Scalar Float64 Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFIXUPIMMSD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae},imm8</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W1 55 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Fix up a float64 number in the low quadword element of xmm2 using scalar int32 table in xmm3/m64 and store the result in xmm1.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFIXUPIMMSS--Fix Up Special Scalar Float32 Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFIXUPIMMSS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae},imm8</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W0 55 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Fix up a float32 number in the low doubleword element in xmm2 using scalar int32 table in xmm3/m32 and store the result in xmm1.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFMADD132PD/VFMADD213PD/VFMADD231PD--Fused Multiply-Add of Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD132PD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 98 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/mem, add to xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD213PD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 A8 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, add to xmm3/mem and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD231PD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 B8 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/mem, add to xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD132PD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 98 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/mem, add to ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD213PD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 A8 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, add to ymm3/mem and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD231PD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 B8 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/mem, add to ymm1 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD132PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="RVM">EVEX.NDS.128.66.0F38.W1 98 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/m128/m64bcst, add to xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD213PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 A8 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, add to xmm3/m128/m64bcst and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD231PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 B8 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/m128/m64bcst, add to xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD132PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 98 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/m256/m64bcst, add to ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD213PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 A8 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, add to ymm3/m256/m64bcst and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD231PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 B8 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/m256/m64bcst, add to ymm1 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD132PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 98 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm3/m512/m64bcst, add to zmm2 and put result in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD213PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 A8 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm2, add to zmm3/m512/m64bcst and put result in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD231PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 B8 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from zmm2 and zmm3/m512/m64bcst, add to zmm1 and put result in zmm1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFMADD132PS/VFMADD213PS/VFMADD231PS--Fused Multiply-Add of Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD132PS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 98 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/mem, add to xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD213PS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 A8 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, add to xmm3/mem and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD231PS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 B8 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/mem, add to xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD132PS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 98 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/mem, add to ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD213PS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 A8 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, add to ymm3/mem and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD231PS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 B8 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/mem, add to ymm1 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD132PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 98 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/m128/m32bcst, add to xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD213PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 A8 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, add to xmm3/m128/m32bcst and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD231PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 B8 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/m128/m32bcst, add to xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD132PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 98 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/m256/m32bcst, add to ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD213PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 A8 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, add to ymm3/m256/m32bcst and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD231PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 B8 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/m256/m32bcst, add to ymm1 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD132PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 98 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm3/m512/m32bcst, add to zmm2 and put result in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD213PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 A8 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm2, add to zmm3/m512/m32bcst and put result in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD231PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 B8 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from zmm2 and zmm3/m512/m32bcst, add to zmm1 and put result in zmm1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFMADD132SD/VFMADD213SD/VFMADD231SD--Fused Multiply-Add of Scalar Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD132SD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 99 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/m64, add to xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD213SD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 A9 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, add to xmm3/m64 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD231SD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 B9 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/m64, add to xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD132SD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 99 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/m64, add to xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD213SD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 A9 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, add to xmm3/m64 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD231SD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 B9 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/m64, add to xmm1 and put result in xmm1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFMADD132SS/VFMADD213SS/VFMADD231SS--Fused Multiply-Add of Scalar Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD132SS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 99 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, add to xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD213SS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 A9 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, add to xmm3/m32 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD231SS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 B9 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, add to xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD132SS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 99 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, add to xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD213SS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 A9 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, add to xmm3/m32 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADD231SS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 B9 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, add to xmm1 and put result in xmm1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFMADDSUB132PD/VFMADDSUB213PD/VFMADDSUB231PD--Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB132PD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 96 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/mem, add/subtract elements in xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB213PD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 A6 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, add/subtract elements in xmm3/mem and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB231PD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 B6 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/mem, add/subtract elements in xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB132PD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.DDS.256.66.0F38.W1 96 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/mem, add/subtract elements in ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB213PD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.DDS.256.66.0F38.W1 A6 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, add/subtract elements in ymm3/mem and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB231PD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.DDS.256.66.0F38.W1 B6 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/mem, add/subtract elements in ymm1 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB213PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 A6 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, add/subtract elements in xmm3/m128/m64bcst and put result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB231PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 B6 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/m128/m64bcst, add/subtract elements in xmm1 and put result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB132PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 96 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/m128/m64bcst, add/subtract elements in xmm2 and put result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB213PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 A6 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, add/subtract elements in ymm3/m256/m64bcst and put result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB231PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 B6 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/m256/m64bcst, add/subtract elements in ymm1 and put result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB132PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 96 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/m256/m64bcst, add/subtract elements in ymm2 and put result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB213PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 A6 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from zmm1and zmm2, add/subtract elements in zmm3/m512/m64bcst and put result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB231PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 B6 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from zmm2 and zmm3/m512/m64bcst, add/subtract elements in zmm1 and put result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB132PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 96 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm3/m512/m64bcst, add/subtract elements in zmm2 and put result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFMADDSUB132PS/VFMADDSUB213PS/VFMADDSUB231PS--Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB132PS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 96 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/mem, add/subtract elements in xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB213PS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 A6 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, add/subtract elements in xmm3/mem and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB231PS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 B6 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/mem, add/subtract elements in xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB132PS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.DDS.256.66.0F38.W0 96 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/mem, add/subtract elements in ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB213PS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.DDS.256.66.0F38.W0 A6 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, add/subtract elements in ymm3/mem and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB231PS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.DDS.256.66.0F38.W0 B6 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/mem, add/subtract elements in ymm1 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB213PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 A6 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, add/subtract elements in xmm3/m128/m32bcst and put result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB231PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 B6 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/m128/m32bcst, add/subtract elements in xmm1 and put result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB132PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 96 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/m128/m32bcst, add/subtract elements in zmm2 and put result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB213PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 A6 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, add/subtract elements in ymm3/m256/m32bcst and put result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB231PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 B6 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/m256/m32bcst, add/subtract elements in ymm1 and put result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB132PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 96 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/m256/m32bcst, add/subtract elements in ymm2 and put result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB213PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 A6 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm2, add/subtract elements in zmm3/m512/m32bcst and put result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB231PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 B6 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from zmm2 and zmm3/m512/m32bcst, add/subtract elements in zmm1 and put result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMADDSUB132PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 96 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm3/m512/m32bcst, add/subtract elements in zmm2 and put result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFMSUBADD132PD/VFMSUBADD213PD/VFMSUBADD231PD--Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD132PD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 97 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/mem, subtract/add elements in xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD213PD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 A7 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, subtract/add elements in xmm3/mem and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD231PD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.DDS.128.66.0F38.W1 B7 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/mem, subtract/add elements in xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD132PD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.DDS.256.66.0F38.W1 97 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/mem, subtract/add elements in ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD213PD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.DDS.256.66.0F38.W1 A7 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, subtract/add elements in ymm3/mem and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD231PD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.DDS.256.66.0F38.W1 B7 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/mem, subtract/add elements in ymm1 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD132PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 97 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/m128/m64bcst, subtract/add elements in xmm2 and put result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD213PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 A7 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, subtract/add elements in xmm3/m128/m64bcst and put result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD231PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 B7 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/m128/m64bcst, subtract/add elements in xmm1 and put result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD132PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 97 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/m256/m64bcst, subtract/add elements in ymm2 and put result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD213PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 A7 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, subtract/add elements in ymm3/m256/m64bcst and put result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD231PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 B7 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/m256/m64bcst, subtract/add elements in ymm1 and put result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD132PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 97 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm3/m512/m64bcst, subtract/add elements in zmm2 and put result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD213PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 A7 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm2, subtract/add elements in zmm3/m512/m64bcst and put result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD231PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 B7 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from zmm2 and zmm3/m512/m64bcst, subtract/add elements in zmm1 and put result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFMSUBADD132PS/VFMSUBADD213PS/VFMSUBADD231PS--Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD132PS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 97 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/mem, subtract/add elements in xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD213PS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 A7 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, subtract/add elements in xmm3/mem and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD231PS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.DDS.128.66.0F38.W0 B7 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/mem, subtract/add elements in xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD132PS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.DDS.256.66.0F38.W0 97 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/mem, subtract/add elements in ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD213PS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.DDS.256.66.0F38.W0 A7 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, subtract/add elements in ymm3/mem and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD231PS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.DDS.256.66.0F38.W0 B7 /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/mem, subtract/add elements in ymm1 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD132PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 97 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/m128/m32bcst, subtract/add elements in xmm2 and put result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD213PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 A7 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, subtract/add elements in xmm3/m128/m32bcst and put result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD231PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 B7 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/m128/m32bcst, subtract/add elements in xmm1 and put result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD132PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 97 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/m256/m32bcst, subtract/add elements in ymm2 and put result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD213PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 A7 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, subtract/add elements in ymm3/m256/m32bcst and put result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD231PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 B7 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/m256/m32bcst, subtract/add elements in ymm1 and put result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD132PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 97 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm3/m512/m32bcst, subtract/add elements in zmm2 and put result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD213PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 A7 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm2, subtract/add elements in zmm3/m512/m32bcst and put result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUBADD231PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 B7 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from zmm2 and zmm3/m512/m32bcst, subtract/add elements in zmm1 and put result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFMSUB132PD/VFMSUB213PD/VFMSUB231PD--Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB132PD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 9A /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/mem, subtract xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB213PD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 AA /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, subtract xmm3/mem and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB231PD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 BA /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/mem, subtract xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB132PD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 9A /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/mem, subtract ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB213PD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 AA /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, subtract ymm3/mem and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB231PD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 BA /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/mem, subtract ymm1 and put result in ymm1.S.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB132PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 9A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/m128/m64bcst, subtract xmm2 and put result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB213PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 AA /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, subtract xmm3/m128/m64bcst and put result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB231PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 BA /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/m128/m64bcst, subtract xmm1 and put result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB132PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 9A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/m256/m64bcst, subtract ymm2 and put result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB213PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 AA /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, subtract ymm3/m256/m64bcst and put result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB231PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 BA /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/m256/m64bcst, subtract ymm1 and put result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB132PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 9A /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm3/m512/m64bcst, subtract zmm2 and put result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB213PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 AA /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm2, subtract zmm3/m512/m64bcst and put result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB231PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 BA /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from zmm2 and zmm3/m512/m64bcst, subtract zmm1 and put result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFMSUB132PS/VFMSUB213PS/VFMSUB231PS--Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB132PS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 9A /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/mem, subtract xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB213PS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 AA /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, subtract xmm3/mem and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB231PS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 BA /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/mem, subtract xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB132PS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 9A /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/mem, subtract ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB213PS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 AA /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, subtract ymm3/mem and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB231PS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 BA /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/mem, subtract ymm1 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB132PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 9A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/m128/m32bcst, subtract xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB213PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 AA /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, subtract xmm3/m128/m32bcst and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB231PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 BA /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/m128/m32bcst, subtract xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB132PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 9A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/m256/m32bcst, subtract ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB213PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 AA /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, subtract ymm3/m256/m32bcst and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB231PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 BA /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/m256/m32bcst, subtract ymm1 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB132PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 9A /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm3/m512/m32bcst, subtract zmm2 and put result in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB213PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 AA /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm2, subtract zmm3/m512/m32bcst and put result in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB231PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 BA /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from zmm2 and zmm3/m512/m32bcst, subtract zmm1 and put result in zmm1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFMSUB132SD/VFMSUB213SD/VFMSUB231SD--Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB132SD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 9B /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/m64, subtract xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB213SD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 AB /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, subtract xmm3/m64 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB231SD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 BB /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/m64, subtract xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB132SD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 9B /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/m64, subtract xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB213SD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 AB /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, subtract xmm3/m64 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB231SD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 BB /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/m64, subtract xmm1 and put result in xmm1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFMSUB132SS/VFMSUB213SS/VFMSUB231SS--Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB132SS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 9B /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, subtract xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB213SS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 AB /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, subtract xmm3/m32 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB231SS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 BB /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, subtract xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB132SS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 9B /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, subtract xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB213SS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 AB /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, subtract xmm3/m32 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFMSUB231SS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 BB /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, subtract xmm1 and put result in xmm1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFNMADD132PD/VFNMADD213PD/VFNMADD231PD--Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD132PD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 9C /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/mem, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD213PD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 AC /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, negate the multiplication result and add to xmm3/mem and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD231PD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 BC /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/mem, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD132PD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 9C /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/mem, negate the multiplication result and add to ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD213PD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 AC /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, negate the multiplication result and add to ymm3/mem and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD231PD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 BC /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/mem, negate the multiplication result and add to ymm1 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD132PD</mnem>
			<args>xmm0 {k1}{z},xmm1,xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 9C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/m128/m64bcst, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD213PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 AC /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, negate the multiplication result and add to xmm3/m128/m64bcst and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD231PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 BC /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/m128/m64bcst, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD132PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 9C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/m256/m64bcst, negate the multiplication result and add to ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD213PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 AC /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, negate the multiplication result and add to ymm3/m256/m64bcst and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD231PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 BC /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/m256/m64bcst, negate the multiplication result and add to ymm1 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD132PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 9C /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm3/m512/m64bcst, negate the multiplication result and add to zmm2 and put result in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD213PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 AC /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm2, negate the multiplication result and add to zmm3/m512/m64bcst and put result in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD231PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 BC /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from zmm2 and zmm3/m512/m64bcst, negate the multiplication result and add to zmm1 and put result in zmm1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFNMADD132PS/VFNMADD213PS/VFNMADD231PS--Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD132PS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 9C /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/mem, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD213PS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 AC /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, negate the multiplication result and add to xmm3/mem and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD231PS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 BC /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/mem, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD132PS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 9C /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/mem, negate the multiplication result and add to ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD213PS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 AC /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, negate the multiplication result and add to ymm3/mem and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD231PS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 BC /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/mem, negate the multiplication result and add to ymm1 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD132PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 9C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/m128/m32bcst, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD213PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 AC /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, negate the multiplication result and add to xmm3/m128/m32bcst and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD231PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 BC /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/m128/m32bcst, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD132PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 9C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/m256/m32bcst, negate the multiplication result and add to ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD213PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 AC /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, negate the multiplication result and add to ymm3/m256/m32bcst and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD231PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 BC /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/m256/m32bcst, negate the multiplication result and add to ymm1 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD132PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 9C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm3/m512/m32bcst, negate the multiplication result and add to zmm2 and put result in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD213PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 AC /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm2, negate the multiplication result and add to zmm3/m512/m32bcst and put result in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD231PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 BC /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from zmm2 and zmm3/m512/m32bcst, negate the multiplication result and add to zmm1 and put result in zmm1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFNMADD132SD/VFNMADD213SD/VFNMADD231SD--Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD132SD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 9D /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/mem, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD213SD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 AD /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, negate the multiplication result and add to xmm3/mem and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD231SD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 BD /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/mem, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD132SD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 9D /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/m64, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD213SD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 AD /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, negate the multiplication result and add to xmm3/m64 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD231SD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 BD /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/m64, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFNMADD132SS/VFNMADD213SS/VFNMADD231SS--Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD132SS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 9D /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD213SS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 AD /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, negate the multiplication result and add to xmm3/m32 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD231SS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 BD /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD132SS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 9D /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, negate the multiplication result and add to xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD213SS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 AD /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, negate the multiplication result and add to xmm3/m32 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMADD231SS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 BD /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, negate the multiplication result and add to xmm1 and put result in xmm1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFNMSUB132PD/VFNMSUB213PD/VFNMSUB231PD--Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB132PD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 9E /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/mem, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB213PD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 AE /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, negate the multiplication result and subtract xmm3/mem and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB231PD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 BE /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/mem, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB132PD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 9E /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/mem, negate the multiplication result and subtract ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB213PD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 AE /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, negate the multiplication result and subtract ymm3/mem and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB231PD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 BE /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/mem, negate the multiplication result and subtract ymm1 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB132PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 9E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm3/m128/m64bcst, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB213PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 AE /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm1 and xmm2, negate the multiplication result and subtract xmm3/m128/m64bcst and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB231PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 BE /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm2 and xmm3/m128/m64bcst, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB132PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 9E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm3/m256/m64bcst, negate the multiplication result and subtract ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB213PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 AE /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm1 and ymm2, negate the multiplication result and subtract ymm3/m256/m64bcst and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB231PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 BE /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm2 and ymm3/m256/m64bcst, negate the multiplication result and subtract ymm1 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB132PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 9E /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm3/m512/m64bcst, negate the multiplication result and subtract zmm2 and put result in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB213PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 AE /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from zmm1 and zmm2, negate the multiplication result and subtract zmm3/m512/m64bcst and put result in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB231PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 BE /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from zmm2 and zmm3/m512/m64bcst, negate the multiplication result and subtract zmm1 and put result in zmm1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFNMSUB132PS/VFNMSUB213PS/VFNMSUB231PS--Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB132PS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 9E /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/mem, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB213PS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 AE /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, negate the multiplication result and subtract xmm3/mem and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB231PS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 BE /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/mem, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB132PS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 9E /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/mem, negate the multiplication result and subtract ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB213PS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 AE /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, negate the multiplication result and subtract ymm3/mem and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB231PS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 BE /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/mem, negate the multiplication result and subtract ymm1 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB132PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 9E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm3/m128/m32bcst, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB213PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 AE /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm1 and xmm2, negate the multiplication result and subtract xmm3/m128/m32bcst and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB231PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 BE /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm2 and xmm3/m128/m32bcst, negate the multiplication result subtract add to xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB132PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 9E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm3/m256/m32bcst, negate the multiplication result and subtract ymm2 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB213PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 AE /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm1 and ymm2, negate the multiplication result and subtract ymm3/m256/m32bcst and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB231PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 BE /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm2 and ymm3/m256/m32bcst, negate the multiplication result subtract add to ymm1 and put result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB132PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 9E /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm3/m512/m32bcst, negate the multiplication result and subtract zmm2 and put result in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB213PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 AE /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from zmm1 and zmm2, negate the multiplication result and subtract zmm3/m512/m32bcst and put result in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB231PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 BE /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from zmm2 and zmm3/m512/m32bcst, negate the multiplication result subtract add to zmm1 and put result in zmm1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFNMSUB132SD/VFNMSUB213SD/VFNMSUB231SD--Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB132SD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 9F /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/mem, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB213SD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 AF /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, negate the multiplication result and subtract xmm3/mem and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB231SD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W1 BF /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/mem, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB132SD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 9F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm3/m64, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB213SD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 AF /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm1 and xmm2, negate the multiplication result and subtract xmm3/m64 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB231SD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W1 BF /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar double-precision floating-point value from xmm2 and xmm3/m64, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFNMSUB132SS/VFNMSUB213SS/VFNMSUB231SS--Fused Negative Multiply-Subtract of Scalar Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB132SS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 9F /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB213SS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 AF /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, negate the multiplication result and subtract xmm3/m32 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB231SS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.DDS.LIG.66.0F38.W0 BF /r</opc>
			<cpuid>
				<flag>FMA</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB132SS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 9F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm3/m32, negate the multiplication result and subtract xmm2 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB213SS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 AF /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm1 and xmm2, negate the multiplication result and subtract xmm3/m32 and put result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFNMSUB231SS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
			<opc openc="T1S">EVEX.DDS.LIG.66.0F38.W0 BF /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply scalar single-precision floating-point value from xmm2 and xmm3/m32, negate the multiplication result and subtract xmm1 and put result in xmm1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFPCLASSPD--Tests Types Of a Packed Float64 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFPCLASSPD</mnem>
			<args>k2 {k1},xmm2/m128/m64bcst,imm8</args>
			<opc openc="FV">EVEX.128.66.0F3A.W1 66 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFPCLASSPD</mnem>
			<args>k2 {k1},ymm2/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.256.66.0F3A.W1 66 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFPCLASSPD</mnem>
			<args>k2 {k1},zmm2/m512/m64bcst,imm8</args>
			<opc openc="FV">EVEX.512.66.0F3A.W1 66 /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFPCLASSPS--Tests Types Of a Packed Float32 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFPCLASSPS</mnem>
			<args>k2 {k1},xmm2/m128/m32bcst,imm8</args>
			<opc openc="FV">EVEX.128.66.0F3A.W0 66 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFPCLASSPS</mnem>
			<args>k2 {k1},ymm2/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.256.66.0F3A.W0 66 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VFPCLASSPS</mnem>
			<args>k2 {k1},zmm2/m512/m32bcst,imm8</args>
			<opc openc="FV">EVEX.512.66.0F3A.W0 66 /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFPCLASSSD--Tests Types Of a Scalar Float64 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFPCLASSSD</mnem>
			<args>k2 {k1},xmm2/m64,imm8</args>
			<opc openc="T1S">EVEX.LIG.66.0F3A.W1 67 /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VFPCLASSSS--Tests Types Of a Scalar Float32 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VFPCLASSSS</mnem>
			<args>k2 {k1},xmm2/m32,imm8</args>
			<opc openc="T1S">EVEX.LIG.66.0F3A.W0 67 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Tests the input for the following categories: NaN, +0, -0, +Infinity, -Infinity, denormal, finite negative. The immediate field provides a mask bit for each of these category tests. The masked test results are OR-ed together to form a mask result.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPGATHERDD/VPGATHERDQ--Gather Packed Dword, Packed Qword with Signed Dword Indices.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPGATHERDD</mnem>
			<args>xmm1 {k1},vm32x</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 90 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, gather dword values from memory using writemask k1 for merging-masking.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPGATHERDD</mnem>
			<args>ymm1 {k1},vm32y</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 90 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, gather dword values from memory using writemask k1 for merging-masking.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPGATHERDD</mnem>
			<args>zmm1 {k1},vm32z</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 90 /vsib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, gather dword values from memory using writemask k1 for merging-masking.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPGATHERDQ</mnem>
			<args>xmm1 {k1},vm32x</args>
			<opc openc="T1S">EVEX.128.66.0F38.W1 90 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, gather quadword values from memory using writemask k1 for merging-masking.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPGATHERDQ</mnem>
			<args>ymm1 {k1},vm32x</args>
			<opc openc="T1S">EVEX.256.66.0F38.W1 90 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, gather quadword values from memory using writemask k1 for merging-masking.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPGATHERDQ</mnem>
			<args>zmm1 {k1},vm32y</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 90 /vsib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, gather quadword values from memory using writemask k1 for merging-masking.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPGATHERQD/VPGATHERQQ--Gather Packed Dword, Packed Qword with Signed Qword Indices.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPGATHERQD</mnem>
			<args>xmm1 {k1},vm64x</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 91 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, gather dword values from memory using writemask k1 for merging-masking.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPGATHERQD</mnem>
			<args>xmm1 {k1},vm64y</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 91 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, gather dword values from memory using writemask k1 for merging-masking.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPGATHERQD</mnem>
			<args>ymm1 {k1},vm64z</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 91 /vsib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, gather dword values from memory using writemask k1 for merging-masking.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPGATHERQQ</mnem>
			<args>xmm1 {k1},vm64x</args>
			<opc openc="T1S">EVEX.128.66.0F38.W1 91 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, gather quadword values from memory using writemask k1 for merging-masking.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPGATHERQQ</mnem>
			<args>ymm1 {k1},vm64y</args>
			<opc openc="T1S">EVEX.256.66.0F38.W1 91 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, gather quadword values from memory using writemask k1 for merging-masking.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPGATHERQQ</mnem>
			<args>zmm1 {k1},vm64z</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 91 /vsib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, gather quadword values from memory using writemask k1 for merging-masking.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VGATHERDPS/VGATHERDPD--Gather Packed Single, Packed Double with Signed Dword.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERDPS</mnem>
			<args>xmm1 {k1},vm32x</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 92 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, gather single-precision floatingpoint values from memory using k1 as completion mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERDPS</mnem>
			<args>ymm1 {k1},vm32y</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 92 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, gather single-precision floatingpoint values from memory using k1 as completion mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERDPS</mnem>
			<args>zmm1 {k1},vm32z</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 92 /vsib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, gather single-precision floatingpoint values from memory using k1 as completion mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERDPD</mnem>
			<args>xmm1 {k1},vm32x</args>
			<opc openc="T1S">EVEX.128.66.0F38.W1 92 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, gather float64 vector into float64 vector xmm1 using k1 as completion mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERDPD</mnem>
			<args>ymm1 {k1},vm32x</args>
			<opc openc="T1S">EVEX.256.66.0F38.W1 92 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, gather float64 vector into float64 vector ymm1 using k1 as completion mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERDPD</mnem>
			<args>zmm1 {k1},vm32y</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 92 /vsib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, gather float64 vector into float64 vector zmm1 using k1 as completion mask.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VGATHERQPS/VGATHERQPD--Gather Packed Single, Packed Double with Signed Qword Indices.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERQPS</mnem>
			<args>xmm1 {k1},vm64x</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 93 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, gather single-precision floating-point values from memory using k1 as completion mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERQPS</mnem>
			<args>xmm1 {k1},vm64y</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 93 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, gather single-precision floating-point values from memory using k1 as completion mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERQPS</mnem>
			<args>ymm1 {k1},vm64z</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 93 /vsib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, gather single-precision floating-point values from memory using k1 as completion mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERQPD</mnem>
			<args>xmm1 {k1},vm64x</args>
			<opc openc="T1S">EVEX.128.66.0F38.W1 93 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, gather float64 vector into float64 vector xmm1 using k1 as completion mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERQPD</mnem>
			<args>ymm1 {k1},vm64y</args>
			<opc openc="T1S">EVEX.256.66.0F38.W1 93 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, gather float64 vector into float64 vector ymm1 using k1 as completion mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERQPD</mnem>
			<args>zmm1 {k1},vm64z</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 93 /vsib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, gather float64 vector into float64 vector zmm1 using k1 as completion mask.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VGETEXPPD--Convert Exponents of Packed DP FP Values to DP FP Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VGETEXPPD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.66.0F38.W1 42 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert the exponent of packed double-precision floating-point values in the source operand to DP FP results representing unbiased integer exponents and stores the results in the destination register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGETEXPPD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.66.0F38.W1 42 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert the exponent of packed double-precision floating-point values in the source operand to DP FP results representing unbiased integer exponents and stores the results in the destination register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGETEXPPD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{sae}</args>
			<opc openc="FV">EVEX.512.66.0F38.W1 42 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert the exponent of packed double-precision floating-point values in the source operand to DP FP results representing unbiased integer exponents and stores the results in the destination under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VGETEXPPS--Convert Exponents of Packed SP FP Values to SP FP Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VGETEXPPS</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="FV">EVEX.128.66.0F38.W0 42 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert the exponent of packed single-precision floating-point values in the source operand to SP FP results representing unbiased integer exponents and stores the results in the destination register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGETEXPPS</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
			<opc openc="FV">EVEX.256.66.0F38.W0 42 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert the exponent of packed single-precision floating-point values in the source operand to SP FP results representing unbiased integer exponents and stores the results in the destination register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGETEXPPS</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{sae}</args>
			<opc openc="FV">EVEX.512.66.0F38.W0 42 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert the exponent of packed single-precision floating-point values in the source operand to SP FP results representing unbiased integer exponents and stores the results in the destination register.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VGETEXPSD--Convert Exponents of Scalar DP FP Values to DP FP Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VGETEXPSD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae}</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W1 43 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert the biased exponent (bits 62:52) of the low doubleprecision floating-point value in xmm3/m64 to a DP FP value representing unbiased integer exponent. Stores the result to the low 64-bit of xmm1 under the writemask k1 and merge with the other elements of xmm2.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VGETEXPSS--Convert Exponents of Scalar SP FP Values to SP FP Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VGETEXPSS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae}</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W0 43 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Convert the biased exponent (bits 30:23) of the low singleprecision floating-point value in xmm3/m32 to a SP FP value representing unbiased integer exponent. Stores the result to xmm1 under the writemask k1 and merge with the other elements of xmm2.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VGETMANTPD--Extract Float64 Vector of Normalized Mantissas from Float64 Vector.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VGETMANTPD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
			<opc openc="FV">EVEX.128.66.0F3A.W1 26 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Get Normalized Mantissa from float64 vector xmm2/m128/m64bcst and store the result in xmm1, using imm8 for sign control and mantissa interval normalization, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGETMANTPD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.256.66.0F3A.W1 26 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Get Normalized Mantissa from float64 vector ymm2/m256/m64bcst and store the result in ymm1, using imm8 for sign control and mantissa interval normalization, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGETMANTPD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{sae},imm8</args>
			<opc openc="FV">EVEX.512.66.0F3A.W1 26 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Get Normalized Mantissa from float64 vector zmm2/m512/m64bcst and store the result in zmm1, using imm8 for sign control and mantissa interval normalization, under writemask.</dscrp>
		</ins>
		<oprndenc openc="FVI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VGETMANTPS--Extract Float32 Vector of Normalized Mantissas from Float32 Vector.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VGETMANTPS</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
			<opc openc="FV">EVEX.128.66.0F3A.W0 26 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Get normalized mantissa from float32 vector xmm2/m128/m32bcst and store the result in xmm1, using imm8 for sign control and mantissa interval normalization, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGETMANTPS</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.256.66.0F3A.W0 26 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Get normalized mantissa from float32 vector ymm2/m256/m32bcst and store the result in ymm1, using imm8 for sign control and mantissa interval normalization, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGETMANTPS</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{sae},imm8</args>
			<opc openc="FV">EVEX.512.66.0F3A.W0 26 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Get normalized mantissa from float32 vector zmm2/m512/m32bcst and store the result in zmm1, using imm8 for sign control and mantissa interval normalization, under writemask.</dscrp>
		</ins>
		<oprndenc openc="FVI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VGETMANTSD--Extract Float64 of Normalized Mantissas from Float64 Scalar.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VGETMANTSD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae},imm8</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W1 27 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Extract the normalized mantissa of the low float64 element in xmm3/m64 using imm8 for sign control and mantissa interval normalization. Store the mantissa to xmm1 under the writemask k1 and merge with the other elements of xmm2.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VGETMANTSS--Extract Float32 Vector of Normalized Mantissa from Float32 Vector.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VGETMANTSS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae},imm8</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W0 27 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Extract the normalized mantissa from the low float32 element of xmm3/m32 using imm8 for sign control and mantissa interval normalization, store the mantissa to xmm1 under the writemask k1 and merge with the other elements of xmm2.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VINSERTF128/VINSERTF32x4/VINSERTF64x2/VINSERTF32x8/VINSERTF64x4--Insert Packed Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VINSERTF128</mnem>
			<args>ymm1,ymm2,xmm3/m128,imm8</args>
			<opc openc="RVMI">VEX.NDS.256.66.0F3A.W0 18 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Insert 128 bits of packed floating-point values from xmm3/m128 and the remaining values from ymm2 into ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VINSERTF32X4</mnem>
			<args>ymm1 {k1}{z},ymm2,xmm3/m128,imm8</args>
			<opc openc="T4">EVEX.NDS.256.66.0F3A.W0 18 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Insert 128 bits of packed single-precision floatingpoint values from xmm3/m128 and the remaining values from ymm2 into ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VINSERTF32X4</mnem>
			<args>zmm1 {k1}{z},zmm2,xmm3/m128,imm8</args>
			<opc openc="T4">EVEX.NDS.512.66.0F3A.W0 18 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Insert 128 bits of packed single-precision floatingpoint values from xmm3/m128 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VINSERTF64X2</mnem>
			<args>ymm1 {k1}{z},ymm2,xmm3/m128,imm8</args>
			<opc openc="T2">EVEX.NDS.256.66.0F3A.W1 18 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Insert 128 bits of packed double-precision floatingpoint values from xmm3/m128 and the remaining values from ymm2 into ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VINSERTF64X2</mnem>
			<args>zmm1 {k1}{z},zmm2,xmm3/m128,imm8</args>
			<opc openc="T2">EVEX.NDS.512.66.0F3A.W1 18 /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Insert 128 bits of packed double-precision floatingpoint values from xmm3/m128 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VINSERTF32X8</mnem>
			<args>zmm1 {k1}{z},zmm2,ymm3/m256,imm8</args>
			<opc openc="T8">EVEX.NDS.512.66.0F3A.W0 1A /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Insert 256 bits of packed single-precision floatingpoint values from ymm3/m256 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VINSERTF64X4</mnem>
			<args>zmm1 {k1}{z},zmm2,ymm3/m256,imm8</args>
			<opc openc="T4">EVEX.NDS.512.66.0F3A.W1 1A /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Insert 256 bits of packed double-precision floatingpoint values from ymm3/m256 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RVMI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
		<oprndenc openc="T2">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
		<oprndenc openc="T4">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
		<oprndenc openc="T8">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VINSERTI128/VINSERTI32x4/VINSERTI64x2/VINSERTI32x8/VINSERTI64x4--Insert Packed Integer Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VINSERTI128</mnem>
			<args>ymm1,ymm2,xmm3/m128,imm8</args>
			<opc openc="RVMI">VEX.NDS.256.66.0F3A.W0 38 /r ib</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Insert 128 bits of integer data from xmm3/m128 and the remaining values from ymm2 into ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VINSERTI32X4</mnem>
			<args>ymm1 {k1}{z},ymm2,xmm3/m128,imm8</args>
			<opc openc="T4">EVEX.NDS.256.66.0F3A.W0 38 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Insert 128 bits of packed doubleword integer values from xmm3/m128 and the remaining values from ymm2 into ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VINSERTI32X4</mnem>
			<args>zmm1 {k1}{z},zmm2,xmm3/m128,imm8</args>
			<opc openc="T4">EVEX.NDS.512.66.0F3A.W0 38 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Insert 128 bits of packed doubleword integer values from xmm3/m128 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VINSERTI64X2</mnem>
			<args>ymm1 {k1}{z},ymm2,xmm3/m128,imm8</args>
			<opc openc="T2">EVEX.NDS.256.66.0F3A.W1 38 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Insert 128 bits of packed quadword integer values from xmm3/m128 and the remaining values from ymm2 into ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VINSERTI64X2</mnem>
			<args>zmm1 {k1}{z},zmm2,xmm3/m128,imm8</args>
			<opc openc="T2">EVEX.NDS.512.66.0F3A.W1 38 /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Insert 128 bits of packed quadword integer values from xmm3/m128 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VINSERTI32X8</mnem>
			<args>zmm1 {k1}{z},zmm2,ymm3/m256,imm8</args>
			<opc openc="T8">EVEX.NDS.512.66.0F3A.W0 3A /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Insert 256 bits of packed doubleword integer values from ymm3/m256 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VINSERTI64X4</mnem>
			<args>zmm1 {k1}{z},zmm2,ymm3/m256,imm8</args>
			<opc openc="T4">EVEX.NDS.512.66.0F3A.W1 3A /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Insert 256 bits of packed quadword integer values from ymm3/m256 and the remaining values from zmm2 into zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RVMI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
		<oprndenc openc="T2">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
		<oprndenc openc="T4">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
		<oprndenc openc="T8">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>INSERTPS--Insert Scalar Single-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>INSERTPS</mnem>
			<args>xmm1,xmm2/m32,imm8</args>
			<opc openc="RMI">66 0F 3A 21 /r ib</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Insert a single-precision floating-point value selected by imm8 from xmm2/m32 into xmm1 at the specified destination element specified by imm8 and zero out destination elements in xmm1 as indicated in imm8.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VINSERTPS</mnem>
			<args>xmm1,xmm2,xmm3/m32,imm8</args>
			<opc openc="RVMI">VEX.NDS.128.66.0F3A.WIG 21 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Insert a single-precision floating-point value selected by imm8 from xmm3/m32 and merge with values in xmm2 at the specified destination element specified by imm8 and write out the result and zero out destination elements in xmm1 as indicated in imm8.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VINSERTPS</mnem>
			<args>xmm1,xmm2,xmm3/m32,imm8</args>
			<opc openc="T1S">EVEX.NDS.128.66.0F3A.W0 21 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Insert a single-precision floating-point value selected by imm8 from xmm3/m32 and merge with values in xmm2 at the specified destination element specified by imm8 and write out the result and zero out destination elements in xmm1 as indicated in imm8.</dscrp>
		</ins>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVMI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MAXPD--Maximum of Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MAXPD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 5F /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Return the maximum double-precision floating-point values between xmm1 and xmm2/m128.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMAXPD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 5F /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the maximum double-precision floating-point values between xmm2 and xmm3/m128.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMAXPD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 5F /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the maximum packed double-precision floating-point values between ymm2 and ymm3/m256.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMAXPD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 5F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Return the maximum packed double-precision floating-point values between xmm2 and xmm3/m128/m64bcst and store result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMAXPD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 5F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Return the maximum packed double-precision floating-point values between ymm2 and ymm3/m256/m64bcst and store result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMAXPD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{sae}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 5F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Return the maximum packed double-precision floating-point values between zmm2 and zmm3/m512/m64bcst and store result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MAXPS--Maximum of Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MAXPS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 5F /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Return the maximum single-precision floating-point values between xmm1 and xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMAXPS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.0F.WIG 5F /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the maximum single-precision floating-point values between xmm2 and xmm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMAXPS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.0F.WIG 5F /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the maximum single-precision floating-point values between ymm2 and ymm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMAXPS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.0F.W0 5F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Return the maximum packed single-precision floating-point values between xmm2 and xmm3/m128/m32bcst and store result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMAXPS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.0F.W0 5F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Return the maximum packed single-precision floating-point values between ymm2 and ymm3/m256/m32bcst and store result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMAXPS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{sae}</args>
			<opc openc="FV">EVEX.NDS.512.0F.W0 5F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Return the maximum packed single-precision floating-point values between zmm2 and zmm3/m512/m32bcst and store result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MAXSD--Return Maximum Scalar Double-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MAXSD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">F2 0F 5F /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Return the maximum scalar double-precision floating-point value between xmm2/m64 and xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMAXSD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 5F /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the maximum scalar double-precision floating-point value between xmm3/m64 and xmm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMAXSD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 5F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Return the maximum scalar double-precision floating-point value between xmm3/m64 and xmm2.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MAXSS--Return Maximum Scalar Single-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MAXSS</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">F3 0F 5F /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Return the maximum scalar single-precision floating-point value between xmm2/m32 and xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMAXSS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 5F /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the maximum scalar single-precision floating-point value between xmm3/m32 and xmm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMAXSS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 5F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Return the maximum scalar single-precision floating-point value between xmm3/m32 and xmm2.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MINPD--Minimum of Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MINPD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 5D /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Return the minimum double-precision floating-point values between xmm1 and xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMINPD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 5D /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the minimum double-precision floating-point values between xmm2 and xmm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMINPD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 5D /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the minimum packed double-precision floating-point values between ymm2 and ymm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMINPD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 5D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Return the minimum packed double-precision floating-point values between xmm2 and xmm3/m128/m64bcst and store result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMINPD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 5D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Return the minimum packed double-precision floating-point values between ymm2 and ymm3/m256/m64bcst and store result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMINPD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{sae}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 5D /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Return the minimum packed double-precision floating-point values between zmm2 and zmm3/m512/m64bcst and store result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MINPS--Minimum of Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MINPS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 5D /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Return the minimum single-precision floating-point values between xmm1 and xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMINPS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.0F.WIG 5D /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the minimum single-precision floating-point values between xmm2 and xmm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMINPS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.0F.WIG 5D /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the minimum single double-precision floating-point values between ymm2 and ymm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMINPS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.0F.W0 5D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Return the minimum packed single-precision floating-point values between xmm2 and xmm3/m128/m32bcst and store result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMINPS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.0F.W0 5D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Return the minimum packed single-precision floating-point values between ymm2 and ymm3/m256/m32bcst and store result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMINPS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{sae}</args>
			<opc openc="FV">EVEX.NDS.512.0F.W0 5D /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Return the minimum packed single-precision floating-point values between zmm2 and zmm3/m512/m32bcst and store result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MINSD--Return Minimum Scalar Double-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MINSD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">F2 0F 5D /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Return the minimum scalar double-precision floatingpoint value between xmm2/m64 and xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMINSD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 5D /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the minimum scalar double-precision floatingpoint value between xmm3/m64 and xmm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMINSD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 5D /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Return the minimum scalar double-precision floatingpoint value between xmm3/m64 and xmm2.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MINSS--Return Minimum Scalar Single-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MINSS</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">F3 0F 5D /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Return the minimum scalar single-precision floatingpoint value between xmm2/m32 and xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMINSS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 5D /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the minimum scalar single-precision floatingpoint value between xmm3/m32 and xmm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMINSS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 5D /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Return the minimum scalar single-precision floatingpoint value between xmm3/m32 and xmm2.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVAPD--Move Aligned Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVAPD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 28 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move aligned packed double-precision floatingpoint values from xmm2/mem to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>MOVAPD</mnem>
			<args>xmm2/m128,xmm1</args>
			<opc openc="MR">66 0F 29 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move aligned packed double-precision floatingpoint values from xmm1 to xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.66.0F.WIG 28 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move aligned packed double-precision floatingpoint values from xmm2/mem to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPD</mnem>
			<args>xmm2/m128,xmm1</args>
			<opc openc="MR">VEX.128.66.0F.WIG 29 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move aligned packed double-precision floatingpoint values from xmm1 to xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPD</mnem>
			<args>ymm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.66.0F.WIG 28 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move aligned packed double-precision floatingpoint values from ymm2/mem to ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPD</mnem>
			<args>ymm2/m256,ymm1</args>
			<opc openc="MR">VEX.256.66.0F.WIG 29 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move aligned packed double-precision floatingpoint values from ymm1 to ymm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128</args>
			<opc openc="FVM">EVEX.128.66.0F.W1 28 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed double-precision floatingpoint values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="FVM">EVEX.256.66.0F.W1 28 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed double-precision floatingpoint values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512</args>
			<opc openc="FVM">EVEX.512.66.0F.W1 28 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed double-precision floatingpoint values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPD</mnem>
			<args>xmm2/m128 {k1}{z},xmm1</args>
			<opc openc="FVM">EVEX.128.66.0F.W1 29 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed double-precision floatingpoint values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPD</mnem>
			<args>ymm2/m256 {k1}{z},ymm1</args>
			<opc openc="FVM">EVEX.256.66.0F.W1 29 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed double-precision floatingpoint values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPD</mnem>
			<args>zmm2/m512 {k1}{z},zmm1</args>
			<opc openc="FVM">EVEX.512.66.0F.W1 29 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed double-precision floatingpoint values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM-RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM-MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVAPS--Move Aligned Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVAPS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 28 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Move aligned packed single-precision floating-point values from xmm2/mem to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>MOVAPS</mnem>
			<args>xmm2/m128,xmm1</args>
			<opc openc="MR">0F 29 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Move aligned packed single-precision floating-point values from xmm1 to xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.0F.WIG 28 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move aligned packed single-precision floating-point values from xmm2/mem to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPS</mnem>
			<args>xmm2/m128,xmm1</args>
			<opc openc="MR">VEX.128.0F.WIG 29 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move aligned packed single-precision floating-point values from xmm1 to xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPS</mnem>
			<args>ymm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.0F.WIG 28 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move aligned packed single-precision floating-point values from ymm2/mem to ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPS</mnem>
			<args>ymm2/m256,ymm1</args>
			<opc openc="MR">VEX.256.0F.WIG 29 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move aligned packed single-precision floating-point values from ymm1 to ymm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPS</mnem>
			<args>xmm1 {k1}{z},xmm2/m128</args>
			<opc openc="FVM">EVEX.128.0F.W0 28 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed single-precision floating-point values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPS</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="FVM">EVEX.256.0F.W0 28 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed single-precision floating-point values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPS</mnem>
			<args>zmm1 {k1}{z},zmm2/m512</args>
			<opc openc="FVM">EVEX.512.0F.W0 28 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed single-precision floating-point values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPS</mnem>
			<args>xmm2/m128 {k1}{z},xmm1</args>
			<opc openc="FVM">EVEX.128.0F.W0 29 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed single-precision floating-point values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPS</mnem>
			<args>ymm2/m256 {k 1}{z},ymm1</args>
			<opc openc="FVM">EVEX.256.0F.W0 29 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed single-precision floating-point values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVAPS</mnem>
			<args>zmm2/m512 {k1}{z},zmm1</args>
			<opc openc="FVM">EVEX.512.0F.W0 29 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed single-precision floating-point values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM-RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM-MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVD/MOVQ--Move Doubleword and Quadword.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVD</mnem>
			<args>xmm1,r32/m32</args>
			<opc openc="MR">66 0F 6E /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move doubleword from r/m32 to xmm1.</dscrp>
		</ins>
		<ins x32m="NE" x64m="V">
			<mnem>MOVQ</mnem>
			<args>xmm1,r64/m64</args>
			<opc openc="MR">66 REX.W 0F 6E /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move quadword from r/m64 to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVD</mnem>
			<args>xmm1,r32/m32</args>
			<opc openc="MR">VEX.128.66.0F.W0 6E /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move doubleword from r/m32 to xmm1.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VMOVQ</mnem>
			<args>xmm1,r64/m64</args>
			<opc openc="MR">VEX.128.66.0F.W1 6E /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move quadword from r/m64 to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVD</mnem>
			<args>xmm1,r32/m32</args>
			<opc openc="T1S">EVEX.128.66.0F.W0 6E /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move doubleword from r/m32 to xmm1.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VMOVQ</mnem>
			<args>xmm1,r64/m64</args>
			<opc openc="T1S">EVEX.128.66.0F.W1 6E /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move quadword from r/m64 to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>MOVD</mnem>
			<args>r32/m32,xmm1</args>
			<opc openc="MR">66 0F 7E /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move doubleword from xmm1 register to r/m32.</dscrp>
		</ins>
		<ins x32m="NE" x64m="V">
			<mnem>MOVQ</mnem>
			<args>r64/m64,xmm1</args>
			<opc openc="MR">66 REX.W 0F 7E /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move quadword from xmm1 register to r/m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVD</mnem>
			<args>r32/m32,xmm1</args>
			<opc openc="MR">VEX.128.66.0F.W0 7E /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move doubleword from xmm1 register to r/m32.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VMOVQ</mnem>
			<args>r64/m64,xmm1</args>
			<opc openc="MR">VEX.128.66.0F.W1 7E /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move quadword from xmm1 register to r/m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVD</mnem>
			<args>r32/m32,xmm1</args>
			<opc openc="T1S">EVEX.128.66.0F.W0 7E /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move doubleword from xmm1 register to r/m32.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VMOVQ</mnem>
			<args>r64/m64,xmm1</args>
			<opc openc="T1S">EVEX.128.66.0F.W1 7E /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move quadword from xmm1 register to r/m64.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S-RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S-MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVQ--Move Quadword.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVQ</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">F3 0F 7E /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move quadword from xmm2/m64 to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVQ</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">VEX.128.F3.0F.WIG 7E /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move quadword from xmm2/m64 to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVQ</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="T1S">EVEX.128.F3.0F.W1 7E /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move quadword from xmm2/m64 to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>MOVQ</mnem>
			<args>xmm1/m64,xmm2</args>
			<opc openc="MR">66 0F D6 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move quadword from xmm2 register to xmm1/m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVQ</mnem>
			<args>xmm1/m64,xmm2</args>
			<opc openc="MR">VEX.128.66.0F.WIG D6 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move quadword from xmm2 register to xmm1/m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVQ</mnem>
			<args>xmm1/m64,xmm2</args>
			<opc openc="T1S">EVEX.128.66.0F.W1 D6 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move quadword from xmm2 register to xmm1/m64.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S-RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S-MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVDDUP--Replicate Double FP Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVDDUP</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">F2 0F 12 /r</opc>
			<cpuid>
				<flag>SSE3</flag>
			</cpuid>
			<dscrp>Move double-precision floating-point value from xmm2/m64 and duplicate into xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDDUP</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">VEX.128.F2.0F.WIG 12 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move double-precision floating-point value from xmm2/m64 and duplicate into xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDDUP</mnem>
			<args>ymm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.F2.0F.WIG 12 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move even index double-precision floating-point values from ymm2/mem and duplicate each element into ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDDUP</mnem>
			<args>xmm1 {k1}{z},xmm2/m64</args>
			<opc openc="DUP">EVEX.128.F2.0F.W1 12 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move double-precision floating-point value from xmm2/m64 and duplicate each element into xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDDUP</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="DUP">EVEX.256.F2.0F.W1 12 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move even index double-precision floating-point values from ymm2/m256 and duplicate each element into ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDDUP</mnem>
			<args>zmm1 {k1}{z},zmm2/m512</args>
			<opc openc="DUP">EVEX.512.F2.0F.W1 12 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move even index double-precision floating-point values from zmm2/m512 and duplicate each element into zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="DUP-RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVDQA,VMOVDQA32/64--Move Aligned Packed Integer Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVDQA</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 6F /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move aligned packed integer values from xmm2/mem to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>MOVDQA</mnem>
			<args>xmm2/m128,xmm1</args>
			<opc openc="MR">66 0F 7F /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move aligned packed integer values from xmm1 to xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQA</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.66.0F.WIG 6F /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move aligned packed integer values from xmm2/mem to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQA</mnem>
			<args>xmm2/m128,xmm1</args>
			<opc openc="MR">VEX.128.66.0F.WIG 7F /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move aligned packed integer values from xmm1 to xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQA</mnem>
			<args>ymm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.66.0F.WIG 6F /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move aligned packed integer values from ymm2/mem to ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQA</mnem>
			<args>ymm2/m256,ymm1</args>
			<opc openc="MR">VEX.256.66.0F.WIG 7F /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move aligned packed integer values from ymm1 to ymm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQA32</mnem>
			<args>xmm1 {k1}{z},xmm2/m128</args>
			<opc openc="FVM">EVEX.128.66.0F.W0 6F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed doubleword integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQA32</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="FVM">EVEX.256.66.0F.W0 6F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed doubleword integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQA32</mnem>
			<args>zmm1 {k1}{z},zmm2/m512</args>
			<opc openc="FVM">EVEX.512.66.0F.W0 6F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed doubleword integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQA32</mnem>
			<args>xmm2/m128 {k1}{z},xmm1</args>
			<opc openc="FVM">EVEX.128.66.0F.W0 7F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed doubleword integer values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQA32</mnem>
			<args>ymm2/m256 {k1}{z},ymm1</args>
			<opc openc="FVM">EVEX.256.66.0F.W0 7F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed doubleword integer values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQA32</mnem>
			<args>zmm2/m512 {k1}{z},zmm1</args>
			<opc openc="FVM">EVEX.512.66.0F.W0 7F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed doubleword integer values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQA64</mnem>
			<args>xmm1 {k1}{z},xmm2/m128</args>
			<opc openc="FVM">EVEX.128.66.0F.W1 6F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned quadword integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQA64</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="FVM">EVEX.256.66.0F.W1 6F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned quadword integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQA64</mnem>
			<args>zmm1 {k1}{z},zmm2/m512</args>
			<opc openc="FVM">EVEX.512.66.0F.W1 6F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed quadword integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQA64</mnem>
			<args>xmm2/m128 {k1}{z},xmm1</args>
			<opc openc="FVM">EVEX.128.66.0F.W1 7F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed quadword integer values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQA64</mnem>
			<args>ymm2/m256 {k1}{z},ymm1</args>
			<opc openc="FVM">EVEX.256.66.0F.W1 7F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed quadword integer values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQA64</mnem>
			<args>zmm2/m512 {k1}{z},zmm1</args>
			<opc openc="FVM">EVEX.512.66.0F.W1 7F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move aligned packed quadword integer values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM-RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM-MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVDQU,VMOVDQU8/16/32/64--Move Unaligned Packed Integer Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVDQU</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">F3 0F 6F /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move unaligned packed integer values from xmm2/m128 to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>MOVDQU</mnem>
			<args>xmm2/m128,xmm1</args>
			<opc openc="MR">F3 0F 7F /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move unaligned packed integer values from xmm1 to xmm2/m128.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.F3.0F.WIG 6F /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move unaligned packed integer values from xmm2/m128 to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU</mnem>
			<args>xmm2/m128,xmm1</args>
			<opc openc="MR">VEX.128.F3.0F.WIG 7F /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move unaligned packed integer values from xmm1 to xmm2/m128.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU</mnem>
			<args>ymm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.F3.0F.WIG 6F /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move unaligned packed integer values from ymm2/m256 to ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU</mnem>
			<args>ymm2/m256,ymm1</args>
			<opc openc="MR">VEX.256.F3.0F.WIG 7F /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move unaligned packed integer values from ymm1 to ymm2/m256.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU8</mnem>
			<args>xmm1 {k1}{z},xmm2/m128</args>
			<opc openc="FVM">EVEX.128.F2.0F.W0 6F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move unaligned packed byte integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU8</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="FVM">EVEX.256.F2.0F.W0 6F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move unaligned packed byte integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU8</mnem>
			<args>zmm1 {k1}{z},zmm2/m512</args>
			<opc openc="FVM">EVEX.512.F2.0F.W0 6F /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move unaligned packed byte integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU8</mnem>
			<args>xmm2/m128 {k1}{z},xmm1</args>
			<opc openc="FVM">EVEX.128.F2.0F.W0 7F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move unaligned packed byte integer values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU8</mnem>
			<args>ymm2/m256 {k 1}{z},ymm1</args>
			<opc openc="FVM">EVEX.256.F2.0F.W0 7F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move unaligned packed byte integer values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU8</mnem>
			<args>zmm2/m512 {k1}{z},zmm1</args>
			<opc openc="FVM">EVEX.512.F2.0F.W0 7F /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move unaligned packed byte integer values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU16</mnem>
			<args>xmm1 {k1}{z},xmm2/m128</args>
			<opc openc="FVM">EVEX.128.F2.0F.W1 6F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move unaligned packed word integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU16</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="FVM">EVEX.256.F2.0F.W1 6F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move unaligned packed word integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU16</mnem>
			<args>zmm1 {k1}{z},zmm2/m512</args>
			<opc openc="FVM">EVEX.512.F2.0F.W1 6F /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move unaligned packed word integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU16</mnem>
			<args>xmm2/m128 {k1}{z},xmm1</args>
			<opc openc="FVM">EVEX.128.F2.0F.W1 7F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move unaligned packed word integer values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU16</mnem>
			<args>ymm2/m256 {k1}{z},ymm1</args>
			<opc openc="FVM">EVEX.256.F2.0F.W1 7F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move unaligned packed word integer values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU16</mnem>
			<args>zmm2/m512 {k1}{z},zmm1</args>
			<opc openc="FVM">EVEX.512.F2.0F.W1 7F /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move unaligned packed word integer values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU32</mnem>
			<args>xmm1 {k1}{z},xmm2/mm128</args>
			<opc openc="FVM">EVEX.128.F3.0F.W0 6F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed doubleword integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU32</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="FVM">EVEX.256.F3.0F.W0 6F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed doubleword integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU32</mnem>
			<args>zmm1 {k 1}{z},zmm2/m512</args>
			<opc openc="FVM">EVEX.512.F3.0F.W0 6F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed doubleword integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU32</mnem>
			<args>xmm2/m128 {k1}{z},xmm1</args>
			<opc openc="FVM">EVEX.128.F3.0F.W0 7F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed doubleword integer values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU32</mnem>
			<args>ymm2/m256 {k1}{z},ymm1</args>
			<opc openc="FVM">EVEX.256.F3.0F.W0 7F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed doubleword integer values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU32</mnem>
			<args>zmm2/m512 {k1}{z},zmm1</args>
			<opc openc="FVM">EVEX.512.F3.0F.W0 7F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed doubleword integer values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU64</mnem>
			<args>xmm1 {k1}{z},xmm2/m128</args>
			<opc openc="FVM">EVEX.128.F3.0F.W1 6F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed quadword integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU64</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="FVM">EVEX.256.F3.0F.W1 6F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed quadword integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU64</mnem>
			<args>zmm1 {k1}{z},zmm2/m512</args>
			<opc openc="FVM">EVEX.512.F3.0F.W1 6F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed quadword integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU64</mnem>
			<args>xmm2/m128 {k1}{z},xmm1</args>
			<opc openc="FVM">EVEX.128.F3.0F.W1 7F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed quadword integer values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU64</mnem>
			<args>ymm2/m256 {k1}{z},ymm1</args>
			<opc openc="FVM">EVEX.256.F3.0F.W1 7F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed quadword integer values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVDQU64</mnem>
			<args>zmm2/m512 {k1}{z},zmm1</args>
			<opc openc="FVM">EVEX.512.F3.0F.W1 7F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed quadword integer values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM-RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM-MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVHLPS--Move Packed Single-Precision Floating-Point Values High to Low.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVHLPS</mnem>
			<args>xmm1,xmm2</args>
			<opc openc="RM">0F 12 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Move two packed single-precision floating-point values from high quadword of xmm2 to low quadword of xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVHLPS</mnem>
			<args>xmm1,xmm2,xmm3</args>
			<opc openc="RVM">VEX.NDS.128.0F.WIG 12 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Merge two packed single-precision floating-point values from high quadword of xmm3 and low quadword of xmm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVHLPS</mnem>
			<args>xmm1,xmm2,xmm3</args>
			<opc openc="RVM">EVEX.NDS.128.0F.W0 12 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Merge two packed single-precision floating-point values from high quadword of xmm3 and low quadword of xmm2.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVHPD--Move High Packed Double-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVHPD</mnem>
			<args>xmm1,m64</args>
			<opc openc="RM">66 0F 16 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move double-precision floating-point value from m64 to high quadword of xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVHPD</mnem>
			<args>xmm2,xmm1,m64</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 16 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Merge double-precision floating-point value from m64 and the low quadword of xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVHPD</mnem>
			<args>xmm2,xmm1,m64</args>
			<opc openc="T1S">EVEX.NDS.128.66.0F.W1 16 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Merge double-precision floating-point value from m64 and the low quadword of xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>MOVHPD</mnem>
			<args>m64,xmm1</args>
			<opc openc="MR">66 0F 17 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move double-precision floating-point value from high quadword of xmm1 to m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVHPD</mnem>
			<args>m64,xmm1</args>
			<opc openc="MR">VEX.128.66.0F.WIG 17 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move double-precision floating-point value from high quadword of xmm1 to m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVHPD</mnem>
			<args>m64,xmm1</args>
			<opc openc="T1S">EVEX.128.66.0F.W1 17 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move double-precision floating-point value from high quadword of xmm1 to m64.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S-MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVHPS--Move High Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVHPS</mnem>
			<args>xmm1,m64</args>
			<opc openc="RM">0F 16 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Move two packed single-precision floating-point values from m64 to high quadword of xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVHPS</mnem>
			<args>xmm2,xmm1,m64</args>
			<opc openc="RVM">VEX.NDS.128.0F.WIG 16 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Merge two packed single-precision floating-point values from m64 and the low quadword of xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVHPS</mnem>
			<args>xmm2,xmm1,m64</args>
			<opc openc="T2">EVEX.NDS.128.0F.W0 16 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Merge two packed single-precision floating-point values from m64 and the low quadword of xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>MOVHPS</mnem>
			<args>m64,xmm1</args>
			<opc openc="MR">0F 17 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Move two packed single-precision floating-point values from high quadword of xmm1 to m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVHPS</mnem>
			<args>m64,xmm1</args>
			<opc openc="MR">VEX.128.0F.WIG 17 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move two packed single-precision floating-point values from high quadword of xmm1 to m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVHPS</mnem>
			<args>m64,xmm1</args>
			<opc openc="T2">EVEX.128.0F.W0 17 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move two packed single-precision floating-point values from high quadword of xmm1 to m64.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T2">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T2-MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVLHPS--Move Packed Single-Precision Floating-Point Values Low to High.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVLHPS</mnem>
			<args>xmm1,xmm2</args>
			<opc openc="RM">0F 16 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Move two packed single-precision floating-point values from low quadword of xmm2 to high quadword of xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVLHPS</mnem>
			<args>xmm1,xmm2,xmm3</args>
			<opc openc="RVM">VEX.NDS.128.0F.WIG 16 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Merge two packed single-precision floating-point values from low quadword of xmm3 and low quadword of xmm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVLHPS</mnem>
			<args>xmm1,xmm2,xmm3</args>
			<opc openc="RVM">EVEX.NDS.128.0F.W0 16 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Merge two packed single-precision floating-point values from low quadword of xmm3 and low quadword of xmm2.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVLPD--Move Low Packed Double-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVLPD</mnem>
			<args>xmm1,m64</args>
			<opc openc="RM">66 0F 12 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move double-precision floating-point value from m64 to low quadword of xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVLPD</mnem>
			<args>xmm2,xmm1,m64</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 12 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Merge double-precision floating-point value from m64 and the high quadword of xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVLPD</mnem>
			<args>xmm2,xmm1,m64</args>
			<opc openc="T1S">EVEX.NDS.128.66.0F.W1 12 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Merge double-precision floating-point value from m64 and the high quadword of xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>MOVLPD</mnem>
			<args>m64,xmm1</args>
			<opc openc="MR">66 0F 13/r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move double-precision floating-point value from low quadword of xmm1 to m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVLPD</mnem>
			<args>m64,xmm1</args>
			<opc openc="MR">VEX.128.66.0F.WIG 13/r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move double-precision floating-point value from low quadword of xmm1 to m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVLPD</mnem>
			<args>m64,xmm1</args>
			<opc openc="T1S">EVEX.128.66.0F.W1 13/r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move double-precision floating-point value from low quadword of xmm1 to m64.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:r/m(r)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S-MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVLPS--Move Low Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVLPS</mnem>
			<args>xmm1,m64</args>
			<opc openc="RM">0F 12 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Move two packed single-precision floating-point values from m64 to low quadword of xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVLPS</mnem>
			<args>xmm2,xmm1,m64</args>
			<opc openc="RVM">VEX.NDS.128.0F.WIG 12 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Merge two packed single-precision floating-point values from m64 and the high quadword of xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVLPS</mnem>
			<args>xmm2,xmm1,m64</args>
			<opc openc="T2">EVEX.NDS.128.0F.W0 12 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Merge two packed single-precision floating-point values from m64 and the high quadword of xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>MOVLPS</mnem>
			<args>m64,xmm1</args>
			<opc openc="MR">0F 13/r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Move two packed single-precision floating-point values from low quadword of xmm1 to m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVLPS</mnem>
			<args>m64,xmm1</args>
			<opc openc="MR">VEX.128.0F.WIG 13/r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move two packed single-precision floating-point values from low quadword of xmm1 to m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVLPS</mnem>
			<args>m64,xmm1</args>
			<opc openc="T2">EVEX.128.0F.W0 13/r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move two packed single-precision floating-point values from low quadword of xmm1 to m64.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T2">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T2-MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVNTDQA--Load Double Quadword Non-Temporal Aligned Hint.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVNTDQA</mnem>
			<args>xmm1,m128</args>
			<opc openc="RM">66 0F 38 2A /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Move double quadword from m128 to xmm1 using nontemporal hint if WC memory type.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTDQA</mnem>
			<args>xmm1,m128</args>
			<opc openc="RM">VEX.128.66.0F38.WIG 2A /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move double quadword from m128 to xmm using nontemporal hint if WC memory type.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTDQA</mnem>
			<args>ymm1,m256</args>
			<opc openc="RM">VEX.256.66.0F38.WIG 2A /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Move 256-bit data from m256 to ymm using non-temporal hint if WC memory type.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTDQA</mnem>
			<args>xmm1,m128</args>
			<opc openc="FVM">EVEX.128.66.0F38.W0 2A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move 128-bit data from m128 to xmm using non-temporal hint if WC memory type.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTDQA</mnem>
			<args>ymm1,m256</args>
			<opc openc="FVM">EVEX.256.66.0F38.W0 2A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move 256-bit data from m256 to ymm using non-temporal hint if WC memory type.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTDQA</mnem>
			<args>zmm1,m512</args>
			<opc openc="FVM">EVEX.512.66.0F38.W0 2A /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move 512-bit data from m512 to zmm using non-temporal hint if WC memory type.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVNTDQ--Store Packed Integers Using Non-Temporal Hint.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVNTDQ</mnem>
			<args>m128,xmm1</args>
			<opc openc="MR">66 0F E7 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move packed integer values in xmm1 to m128 using nontemporal hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTDQ</mnem>
			<args>m128,xmm1</args>
			<opc openc="MR">VEX.128.66.0F.WIG E7 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move packed integer values in xmm1 to m128 using nontemporal hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTDQ</mnem>
			<args>m256,ymm1</args>
			<opc openc="MR">VEX.256.66.0F.WIG E7 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move packed integer values in ymm1 to m256 using nontemporal hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTDQ</mnem>
			<args>m128,xmm1</args>
			<opc openc="FVM">EVEX.128.66.0F.W0 E7 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move packed integer values in xmm1 to m128 using nontemporal hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTDQ</mnem>
			<args>m256,ymm1</args>
			<opc openc="FVM">EVEX.256.66.0F.W0 E7 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move packed integer values in zmm1 to m256 using nontemporal hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTDQ</mnem>
			<args>m512,zmm1</args>
			<opc openc="FVM">EVEX.512.66.0F.W0 E7 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move packed integer values in zmm1 to m512 using nontemporal hint.</dscrp>
		</ins>
		<oprndenc openc="MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVNTPD--Store Packed Double-Precision Floating-Point Values Using Non-Temporal Hint.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVNTPD</mnem>
			<args>m128,xmm1</args>
			<opc openc="MR">66 0F 2B /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move packed double-precision values in xmm1 to m128 using non-temporal hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTPD</mnem>
			<args>m128,xmm1</args>
			<opc openc="MR">VEX.128.66.0F.WIG 2B /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move packed double-precision values in xmm1 to m128 using non-temporal hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTPD</mnem>
			<args>m256,ymm1</args>
			<opc openc="MR">VEX.256.66.0F.WIG 2B /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move packed double-precision values in ymm1 to m256 using non-temporal hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTPD</mnem>
			<args>m128,xmm1</args>
			<opc openc="FVM">EVEX.128.66.0F.W1 2B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move packed double-precision values in xmm1 to m128 using non-temporal hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTPD</mnem>
			<args>m256,ymm1</args>
			<opc openc="FVM">EVEX.256.66.0F.W1 2B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move packed double-precision values in ymm1 to m256 using non-temporal hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTPD</mnem>
			<args>m512,zmm1</args>
			<opc openc="FVM">EVEX.512.66.0F.W1 2B /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move packed double-precision values in zmm1 to m512 using non-temporal hint.</dscrp>
		</ins>
		<oprndenc openc="MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVNTPS--Store Packed Single-Precision Floating-Point Values Using Non-Temporal Hint.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVNTPS</mnem>
			<args>m128,xmm1</args>
			<opc openc="MR">0F 2B /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Move packed single-precision values xmm1 to mem using non-temporal hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTPS</mnem>
			<args>m128,xmm1</args>
			<opc openc="MR">VEX.128.0F.WIG 2B /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move packed single-precision values xmm1 to mem using non-temporal hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTPS</mnem>
			<args>m256,ymm1</args>
			<opc openc="MR">VEX.256.0F.WIG 2B /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move packed single-precision values ymm1 to mem using non-temporal hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTPS</mnem>
			<args>m128,xmm1</args>
			<opc openc="FVM">EVEX.128.0F.W0 2B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move packed single-precision values in xmm1 to m128 using non-temporal hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTPS</mnem>
			<args>m256,ymm1</args>
			<opc openc="FVM">EVEX.256.0F.W0 2B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move packed single-precision values in ymm1 to m256 using non-temporal hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVNTPS</mnem>
			<args>m512,zmm1</args>
			<opc openc="FVM">EVEX.512.0F.W0 2B /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move packed single-precision values in zmm1 to m512 using non-temporal hint.</dscrp>
		</ins>
		<oprndenc openc="MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVSD--Move or Merge Scalar Double-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVSD</mnem>
			<args>xmm1,xmm2</args>
			<opc openc="RM">F2 0F 10 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move scalar double-precision floating-point value from xmm2 to xmm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>MOVSD</mnem>
			<args>xmm1,m64</args>
			<opc openc="RM">F2 0F 10 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Load scalar double-precision floating-point value from m64 to xmm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>MOVSD</mnem>
			<args>xmm1/m64,xmm2</args>
			<opc openc="MR">F2 0F 11 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move scalar double-precision floating-point value from xmm2 register to xmm1/m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSD</mnem>
			<args>xmm1,xmm2,xmm3</args>
			<opc openc="RVM">VEX.NDS.LIG.F2.0F.WIG 10 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Merge scalar double-precision floating-point value from xmm2 and xmm3 to xmm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSD</mnem>
			<args>xmm1,m64</args>
			<opc openc="XM">VEX.LIG.F2.0F.WIG 10 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Load scalar double-precision floating-point value from m64 to xmm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSD</mnem>
			<args>xmm1,xmm2,xmm3</args>
			<opc openc="MVR">VEX.NDS.LIG.F2.0F.WIG 11 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Merge scalar double-precision floating-point value from xmm2 and xmm3 registers to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSD</mnem>
			<args>m64,xmm1</args>
			<opc openc="MR">VEX.LIG.F2.0F.WIG 11 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Store scalar double-precision floating-point value from xmm1 register to m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3</args>
			<opc openc="RVM">EVEX.NDS.LIG.F2.0F.W1 10 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Merge scalar double-precision floating-point value from xmm2 and xmm3 registers to xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSD</mnem>
			<args>xmm1 {k1}{z},m64</args>
			<opc openc="T1S">EVEX.LIG.F2.0F.W1 10 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Load scalar double-precision floating-point value from m64 to xmm1 register under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3</args>
			<opc openc="MVR">EVEX.NDS.LIG.F2.0F.W1 11 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Merge scalar double-precision floating-point value from xmm2 and xmm3 registers to xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSD</mnem>
			<args>m64 {k1},xmm1</args>
			<opc openc="T1S">EVEX.LIG.F2.0F.W1 11 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Store scalar double-precision floating-point value from xmm1 register to m64 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="XM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="MVR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>vvvv(r)</oprnd2>
			<oprnd3>ModRM:reg(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S-RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S-MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVSHDUP--Replicate Single FP Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVSHDUP</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">F3 0F 16 /r</opc>
			<cpuid>
				<flag>SSE3</flag>
			</cpuid>
			<dscrp>Move odd index single-precision floating-point values from xmm2/mem and duplicate each element into xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSHDUP</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.F3.0F.WIG 16 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move odd index single-precision floating-point values from xmm2/mem and duplicate each element into xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSHDUP</mnem>
			<args>ymm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.F3.0F.WIG 16 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move odd index single-precision floating-point values from ymm2/mem and duplicate each element into ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSHDUP</mnem>
			<args>xmm1 {k1}{z},xmm2/m128</args>
			<opc openc="FVM">EVEX.128.F3.0F.W0 16 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move odd index single-precision floating-point values from xmm2/m128 and duplicate each element into xmm1 under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSHDUP</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="FVM">EVEX.256.F3.0F.W0 16 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move odd index single-precision floating-point values from ymm2/m256 and duplicate each element into ymm1 under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSHDUP</mnem>
			<args>zmm1 {k1}{z},zmm2/m512</args>
			<opc openc="FVM">EVEX.512.F3.0F.W0 16 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move odd index single-precision floating-point values from zmm2/m512 and duplicate each element into zmm1 under writemask.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVSLDUP--Replicate Single FP Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVSLDUP</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="A">F3 0F 12 /r</opc>
			<cpuid>
				<flag>SSE3</flag>
			</cpuid>
			<dscrp>Move even index single-precision floating-point values from xmm2/mem and duplicate each element into xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSLDUP</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.F3.0F.WIG 12 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move even index single-precision floating-point values from xmm2/mem and duplicate each element into xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSLDUP</mnem>
			<args>ymm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.F3.0F.WIG 12 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move even index single-precision floating-point values from ymm2/mem and duplicate each element into ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSLDUP</mnem>
			<args>xmm1 {k1}{z},xmm2/m128</args>
			<opc openc="FVM">EVEX.128.F3.0F.W0 12 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move even index single-precision floating-point values from xmm2/m128 and duplicate each element into xmm1 under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSLDUP</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="FVM">EVEX.256.F3.0F.W0 12 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move even index single-precision floating-point values from ymm2/m256 and duplicate each element into ymm1 under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSLDUP</mnem>
			<args>zmm1 {k1}{z},zmm2/m512</args>
			<opc openc="FVM">EVEX.512.F3.0F.W0 12 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move even index single-precision floating-point values from zmm2/m512 and duplicate each element into zmm1 under writemask.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVSS--Move or Merge Scalar Single-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVSS</mnem>
			<args>xmm1,xmm2</args>
			<opc openc="RM">F3 0F 10 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Merge scalar single-precision floating-point value from xmm2 to xmm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>MOVSS</mnem>
			<args>xmm1,m32</args>
			<opc openc="RM">F3 0F 10 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Load scalar single-precision floating-point value from m32 to xmm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSS</mnem>
			<args>xmm1,xmm2,xmm3</args>
			<opc openc="RVM">VEX.NDS.LIG.F3.0F.WIG 10 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Merge scalar single-precision floating-point value from xmm2 and xmm3 to xmm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSS</mnem>
			<args>xmm1,m32</args>
			<opc openc="XM">VEX.LIG.F3.0F.WIG 10 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Load scalar single-precision floating-point value from m32 to xmm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>MOVSS</mnem>
			<args>xmm2/m32,xmm1</args>
			<opc openc="MR">F3 0F 11 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Move scalar single-precision floating-point value from xmm1 register to xmm2/m32.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSS</mnem>
			<args>xmm1,xmm2,xmm3</args>
			<opc openc="MVR">VEX.NDS.LIG.F3.0F.WIG 11 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move scalar single-precision floating-point value from xmm2 and xmm3 to xmm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSS</mnem>
			<args>m32,xmm1</args>
			<opc openc="MR">VEX.LIG.F3.0F.WIG 11 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move scalar single-precision floating-point value from xmm1 register to m32.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3</args>
			<opc openc="RVM">EVEX.NDS.LIG.F3.0F.W0 10 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move scalar single-precision floating-point value from xmm2 and xmm3 to xmm1 register under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSS</mnem>
			<args>xmm1 {k1}{z},m32</args>
			<opc openc="T1S">EVEX.LIG.F3.0F.W0 10 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move scalar single-precision floating-point values from m32 to xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3</args>
			<opc openc="MVR">EVEX.NDS.LIG.F3.0F.W0 11 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move scalar single-precision floating-point value from xmm2 and xmm3 to xmm1 register under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVSS</mnem>
			<args>m32 {k1},xmm1</args>
			<opc openc="T1S">EVEX.LIG.F3.0F.W0 11 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move scalar single-precision floating-point values from xmm1 to m32 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="XM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="MVR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>vvvv(r)</oprnd2>
			<oprnd3>ModRM:reg(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S-RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S-MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVUPD--Move Unaligned Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVUPD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 10 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move unaligned packed double-precision floatingpoint from xmm2/mem to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>MOVUPD</mnem>
			<args>xmm2/m128,xmm1</args>
			<opc openc="MR">66 0F 11 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Move unaligned packed double-precision floatingpoint from xmm1 to xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.66.0F.WIG 10 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move unaligned packed double-precision floatingpoint from xmm2/mem to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPD</mnem>
			<args>xmm2/m128,xmm1</args>
			<opc openc="MR">VEX.128.66.0F.WIG 11 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move unaligned packed double-precision floatingpoint from xmm1 to xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPD</mnem>
			<args>ymm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.66.0F.WIG 10 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move unaligned packed double-precision floatingpoint from ymm2/mem to ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPD</mnem>
			<args>ymm2/m256,ymm1</args>
			<opc openc="MR">VEX.256.66.0F.WIG 11 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move unaligned packed double-precision floatingpoint from ymm1 to ymm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128</args>
			<opc openc="FVM">EVEX.128.66.0F.W1 10 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed double-precision floatingpoint from xmm2/m128 to xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPD</mnem>
			<args>xmm2/m128 {k1}{z},xmm1</args>
			<opc openc="FVM">EVEX.128.66.0F.W1 11 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed double-precision floatingpoint from xmm1 to xmm2/m128 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="FVM">EVEX.256.66.0F.W1 10 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed double-precision floatingpoint from ymm2/m256 to ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPD</mnem>
			<args>ymm2/m256 {k1}{z},ymm1</args>
			<opc openc="FVM">EVEX.256.66.0F.W1 11 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed double-precision floatingpoint from ymm1 to ymm2/m256 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512</args>
			<opc openc="FVM">EVEX.512.66.0F.W1 10 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed double-precision floatingpoint values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPD</mnem>
			<args>zmm2/m512 {k1}{z},zmm1</args>
			<opc openc="FVM">EVEX.512.66.0F.W1 11 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed double-precision floatingpoint values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM-RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM-MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MOVUPS--Move Unaligned Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MOVUPS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 10 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Move unaligned packed single-precision floating-point from xmm2/mem to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>MOVUPS</mnem>
			<args>xmm2/m128,xmm1</args>
			<opc openc="MR">0F 11 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Move unaligned packed single-precision floating-point from xmm1 to xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.0F.WIG 10 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move unaligned packed single-precision floating-point from xmm2/mem to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPS</mnem>
			<args>xmm2/m128,xmm1</args>
			<opc openc="MR">VEX.128.0F 11.WIG /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move unaligned packed single-precision floating-point from xmm1 to xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPS</mnem>
			<args>ymm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.0F 10.WIG /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move unaligned packed single-precision floating-point from ymm2/mem to ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPS</mnem>
			<args>ymm2/m256,ymm1</args>
			<opc openc="MR">VEX.256.0F 11.WIG /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Move unaligned packed single-precision floating-point from ymm1 to ymm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPS</mnem>
			<args>xmm1 {k1}{z},xmm2/m128</args>
			<opc openc="FVM">EVEX.128.0F.W0 10 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed single-precision floating-point values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPS</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="FVM">EVEX.256.0F.W0 10 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed single-precision floating-point values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPS</mnem>
			<args>zmm1 {k1}{z},zmm2/m512</args>
			<opc openc="FVM">EVEX.512.0F.W0 10 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed single-precision floating-point values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPS</mnem>
			<args>xmm2/m128 {k 1}{z},xmm1</args>
			<opc openc="FVM">EVEX.128.0F.W0 11 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed single-precision floating-point values from xmm1 to xmm2/m128 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPS</mnem>
			<args>ymm2/m256 {k1}{z},ymm1</args>
			<opc openc="FVM">EVEX.256.0F.W0 11 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed single-precision floating-point values from ymm1 to ymm2/m256 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMOVUPS</mnem>
			<args>zmm2/m512 {k1}{z},zmm1</args>
			<opc openc="FVM">EVEX.512.0F.W0 11 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move unaligned packed single-precision floating-point values from zmm1 to zmm2/m512 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM-RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM-MR">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PSADBW--Compute Sum of Absolute Differences.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PSADBW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F F6 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Computes the absolute differences of the packed unsigned byte integers from xmm2 /m128 and xmm1; the 8 low differences and 8 high differences are then summed separately to produce two unsigned word integer results.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSADBW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F F6 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Computes the absolute differences of the packed unsigned byte integers from xmm3 /m128 and xmm2; the 8 low differences and 8 high differences are then summed separately to produce two unsigned word integer results.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSADBW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F F6 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Computes the absolute differences of the packed unsigned byte integers from ymm3 /m256 and ymm2; then each consecutive 8 differences are summed separately to produce four unsigned word integer results.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSADBW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG F6 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Computes the absolute differences of the packed unsigned byte integers from xmm3 /m128 and xmm2; then each consecutive 8 differences are summed separately to produce four unsigned word integer results.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSADBW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG F6 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Computes the absolute differences of the packed unsigned byte integers from ymm3 /m256 and ymm2; then each consecutive 8 differences are summed separately to produce four unsigned word integer results.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSADBW</mnem>
			<args>zmm1,zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG F6 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Computes the absolute differences of the packed unsigned byte integers from zmm3 /m512 and zmm2; then each consecutive 8 differences are summed separately to produce four unsigned word integer results.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MULPD--Multiply Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MULPD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 59 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values in xmm2/m128 with xmm1 and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMULPD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 59 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values in xmm3/m128 with xmm2 and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMULPD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 59 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values in ymm3/m256 with ymm2 and store result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMULPD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 59 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from xmm3/m128/m64bcst to xmm2 and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMULPD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 59 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values from ymm3/m256/m64bcst to ymm2 and store result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMULPD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 59 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed double-precision floating-point values in zmm3/m512/m64bcst with zmm2 and store result in zmm1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MULPS--Multiply Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MULPS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 59 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values in xmm2/m128 with xmm1 and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMULPS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.0F.WIG 59 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values in xmm3/m128 with xmm2 and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMULPS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.0F.WIG 59 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values in ymm3/m256 with ymm2 and store result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMULPS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.0F.W0 59 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from xmm3/m128/m32bcst to xmm2 and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMULPS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.0F.W0 59 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values from ymm3/m256/m32bcst to ymm2 and store result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMULPS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst {er}</args>
			<opc openc="FV">EVEX.NDS.512.0F.W0 59 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed single-precision floating-point values in zmm3/m512/m32bcst with zmm2 and store result in zmm1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MULSD--Multiply Scalar Double-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MULSD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">F2 0F 59 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Multiply the low double-precision floating-point value in xmm2/m64 by low double-precision floating-point value in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMULSD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 59 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Multiply the low double-precision floating-point value in xmm3/m64 by low double-precision floating-point value in xmm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMULSD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64 {er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 59 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply the low double-precision floating-point value in xmm3/m64 by low double-precision floating-point value in xmm2.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>MULSS--Multiply Scalar Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>MULSS</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">F3 0F 59 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Multiply the low single-precision floating-point value in xmm2/m32 by the low single-precision floating-point value in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMULSS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 59 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Multiply the low single-precision floating-point value in xmm3/m32 by the low single-precision floating-point value in xmm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VMULSS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32 {er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 59 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply the low single-precision floating-point value in xmm3/m32 by the low single-precision floating-point value in xmm2.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>ORPD--Bitwise Logical OR of Packed Double Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>ORPD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 56/r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in xmm1 and xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VORPD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F 56 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VORPD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F 56 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VORPD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 56 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in xmm2 and xmm3/m128/m64bcst subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VORPD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 56 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in ymm2 and ymm3/m256/m64bcst subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VORPD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 56 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical OR of packed double-precision floating-point values in zmm2 and zmm3/m512/m64bcst subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>ORPS--Bitwise Logical OR of Packed Single Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>ORPS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 56 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Return the bitwise logical OR of packed single-precision floating-point values in xmm1 and xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VORPS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.0F 56 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the bitwise logical OR of packed single-precision floating-point values in xmm2 and xmm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VORPS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.0F 56 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the bitwise logical OR of packed single-precision floating-point values in ymm2 and ymm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VORPS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.0F.W0 56 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical OR of packed single-precision floating-point values in xmm2 and xmm3/m128/m32bcst subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VORPS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.0F.W0 56 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical OR of packed single-precision floating-point values in ymm2 and ymm3/m256/m32bcst subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VORPS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.0F.W0 56 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical OR of packed single-precision floating-point values in zmm2 and zmm3/m512/m32bcst subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PABSB/PABSW/PABSD/PABSQ--Packed Absolute Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PABSB</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 1C /r</opc>
			<cpuid>
				<flag>SSSE3</flag>
			</cpuid>
			<dscrp>Compute the absolute value of bytes in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PABSW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 1D /r</opc>
			<cpuid>
				<flag>SSSE3</flag>
			</cpuid>
			<dscrp>Compute the absolute value of 16-bit integers in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PABSD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 1E /r</opc>
			<cpuid>
				<flag>SSSE3</flag>
			</cpuid>
			<dscrp>Compute the absolute value of 32-bit integers in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPABSB</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.66.0F38.WIG 1C /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compute the absolute value of bytes in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPABSW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.66.0F38.WIG 1D /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compute the absolute value of 16-bit integers in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPABSD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.66.0F38.WIG 1E /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compute the absolute value of 32-bit integers in xmm2/m128 and store UNSIGNED result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPABSB</mnem>
			<args>ymm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.66.0F38.WIG 1C /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compute the absolute value of bytes in ymm2/m256 and store UNSIGNED result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPABSW</mnem>
			<args>ymm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.66.0F38.WIG 1D /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compute the absolute value of 16-bit integers in ymm2/m256 and store UNSIGNED result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPABSD</mnem>
			<args>ymm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.66.0F38.WIG 1E /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compute the absolute value of 32-bit integers in ymm2/m256 and store UNSIGNED result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPABSB</mnem>
			<args>xmm1 {k1}{z},xmm2/m128</args>
			<opc openc="FVM">EVEX.128.66.0F38.WIG 1C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compute the absolute value of bytes in xmm2/m128 and store UNSIGNED result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPABSB</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="FVM">EVEX.256.66.0F38.WIG 1C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compute the absolute value of bytes in ymm2/m256 and store UNSIGNED result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPABSB</mnem>
			<args>zmm1 {k1}{z},zmm2/m512</args>
			<opc openc="FVM">EVEX.512.66.0F38.WIG 1C /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compute the absolute value of bytes in zmm2/m512 and store UNSIGNED result in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPABSW</mnem>
			<args>xmm1 {k1}{z},xmm2/m128</args>
			<opc openc="FVM">EVEX.128.66.0F38.WIG 1D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compute the absolute value of 16-bit integers in xmm2/m128 and store UNSIGNED result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPABSW</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="FVM">EVEX.256.66.0F38.WIG 1D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compute the absolute value of 16-bit integers in ymm2/m256 and store UNSIGNED result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPABSW</mnem>
			<args>zmm1 {k1}{z},zmm2/m512</args>
			<opc openc="FVM">EVEX.512.66.0F38.WIG 1D /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compute the absolute value of 16-bit integers in zmm2/m512 and store UNSIGNED result in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPABSD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="FV">EVEX.128.66.0F38.W0 1E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compute the absolute value of 32-bit integers in xmm2/m128/m32bcst and store UNSIGNED result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPABSD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
			<opc openc="FV">EVEX.256.66.0F38.W0 1E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compute the absolute value of 32-bit integers in ymm2/m256/m32bcst and store UNSIGNED result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPABSD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst</args>
			<opc openc="FV">EVEX.512.66.0F38.W0 1E /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compute the absolute value of 32-bit integers in zmm2/m512/m32bcst and store UNSIGNED result in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPABSQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.66.0F38.W1 1F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compute the absolute value of 64-bit integers in xmm2/m128/m64bcst and store UNSIGNED result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPABSQ</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.66.0F38.W1 1F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compute the absolute value of 64-bit integers in ymm2/m256/m64bcst and store UNSIGNED result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPABSQ</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst</args>
			<opc openc="FV">EVEX.512.66.0F38.W1 1F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compute the absolute value of 64-bit integers in zmm2/m512/m64bcst and store UNSIGNED result in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PACKSSWB/PACKSSDW--Pack with Signed Saturation.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PACKSSWB</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 63 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Converts 8 packed signed word integers from xmm1 and from xxm2/m128 into 16 packed signed byte integers in xmm1 using signed saturation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PACKSSDW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 6B /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 packed signed word integers in xmm1 using signed saturation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKSSWB</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F 63 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Converts 8 packed signed word integers from xmm2 and from xmm3/m128 into 16 packed signed byte integers in xmm1 using signed saturation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKSSDW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F 6B /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Converts 4 packed signed doubleword integers from xmm2 and from xmm3/m128 into 8 packed signed word integers in xmm1 using signed saturation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKSSWB</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F 63 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Converts 16 packed signed word integers from ymm2 and from ymm3/m256 into 32 packed signed byte integers in ymm1 using signed saturation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKSSDW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F 6B /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Converts 8 packed signed doubleword integers from ymm2 and from ymm3/m256 into 16 packed signed word integers in ymm1 using signed saturation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKSSWB</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 63 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Converts packed signed word integers from xmm2 and from xmm3/m128 into packed signed byte integers in xmm1 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKSSWB</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 63 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Converts packed signed word integers from ymm2 and from ymm3/m256 into packed signed byte integers in ymm1 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKSSWB</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 63 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Converts packed signed word integers from zmm2 and from zmm3/m512 into packed signed byte integers in zmm1 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKSSDW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W0 6B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Converts packed signed doubleword integers from xmm2 and from xmm3/m128/m32bcst into packed signed word integers in xmm1 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKSSDW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W0 6B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Converts packed signed doubleword integers from ymm2 and from ymm3/m256/m32bcst into packed signed word integers in ymm1 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKSSDW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W0 6B /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Converts packed signed doubleword integers from zmm2 and from zmm3/m512/m32bcst into packed signed word integers in zmm1 using signed saturation under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PACKUSDW--Pack with Unsigned Saturation.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PACKUSDW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 2B /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Convert 4 packed signed doubleword integers from xmm1 and 4 packed signed doubleword integers from xmm2/m128 into 8 packed unsigned word integers in xmm1 using unsigned saturation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKUSDW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38 2B /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Convert 4 packed signed doubleword integers from xmm2 and 4 packed signed doubleword integers from xmm3/m128 into 8 packed unsigned word integers in xmm1 using unsigned saturation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKUSDW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38 2B /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Convert 8 packed signed doubleword integers from ymm2 and 8 packed signed doubleword integers from ymm3/m256 into 16 packed unsigned word integers in ymm1 using unsigned saturation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKUSDW</mnem>
			<args>xmm1{k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 2B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Convert packed signed doubleword integers from xmm2 and packed signed doubleword integers from xmm3/m128/m32bcst into packed unsigned word integers in xmm1 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKUSDW</mnem>
			<args>ymm1{k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 2B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Convert packed signed doubleword integers from ymm2 and packed signed doubleword integers from ymm3/m256/m32bcst into packed unsigned word integers in ymm1 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKUSDW</mnem>
			<args>zmm1{k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 2B /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Convert packed signed doubleword integers from zmm2 and packed signed doubleword integers from zmm3/m512/m32bcst into packed unsigned word integers in zmm1 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PACKUSWB--Pack with Unsigned Saturation.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PACKUSWB</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 67 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Converts 8 signed word integers from xmm1 and 8 signed word integers from xmm2/m128 into 16 unsigned byte integers in xmm1 using unsigned saturation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKUSWB</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F 67 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Converts 8 signed word integers from xmm2 and 8 signed word integers from xmm3/m128 into 16 unsigned byte integers in xmm1 using unsigned saturation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKUSWB</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F 67 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Converts 16 signed word integers from ymm2 and 16 signed word integers from ymm3/m256 into 32 unsigned byte integers in ymm1 using unsigned saturation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKUSWB</mnem>
			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 67 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Converts signed word integers from xmm2 and signed word integers from xmm3/m128 into unsigned byte integers in xmm1 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKUSWB</mnem>
			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 67 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Converts signed word integers from ymm2 and signed word integers from ymm3/m256 into unsigned byte integers in ymm1 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPACKUSWB</mnem>
			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 67 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Converts signed word integers from zmm2 and signed word integers from zmm3/m512 into unsigned byte integers in zmm1 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PADDB/PADDW/PADDD/PADDQ--Add Packed Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PADDB</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F FC /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Add packed byte integers from xmm2/m128 and xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PADDW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F FD /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Add packed word integers from xmm2/m128 and xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PADDD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F FE /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Add packed doubleword integers from xmm2/m128 and xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PADDQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F D4 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Add packed quadword integers from xmm2/m128 and xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDB</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FC /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Add packed byte integers from xmm2, and xmm3/m128 and store in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FD /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Add packed word integers from xmm2, xmm3/m128 and store in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FE /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Add packed doubleword integers from xmm2, xmm3/m128 and store in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDQ</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D4 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Add packed quadword integers from xmm2, xmm3/m128 and store in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDB</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FC /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Add packed byte integers from ymm2, and ymm3/m256 and store in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FD /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Add packed word integers from ymm2, ymm3/m256 and store in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FE /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Add packed doubleword integers from ymm2, ymm3/m256 and store in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDQ</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D4 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Add packed quadword integers from ymm2, ymm3/m256 and store in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDB</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG FC /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add packed byte integers from xmm2, and xmm3/m128 and store in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG FD /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add packed word integers from xmm2, and xmm3/m128 and store in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W0 FE /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Add packed doubleword integers from xmm2, and xmm3/m128/m32bcst and store in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 D4 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Add packed quadword integers from xmm2, and xmm3/m128/m64bcst and store in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDB</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG FC /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add packed byte integers from ymm2, and ymm3/m256 and store in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG FD /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add packed word integers from ymm2, and ymm3/m256 and store in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W0 FE /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Add packed doubleword integers from ymm2, ymm3/m256/m32bcst and store in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 D4 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Add packed quadword integers from ymm2, ymm3/m256/m64bcst and store in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDB</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG FC /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add packed byte integers from zmm2, and zmm3/m512 and store in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG FD /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add packed word integers from zmm2, and zmm3/m512 and store in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W0 FE /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Add packed doubleword integers from zmm2, zmm3/m512/m32bcst and store in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 D4 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Add packed quadword integers from zmm2, zmm3/m512/m64bcst and store in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PADDSB/PADDSW--Add Packed Signed Integers with Signed Saturation.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PADDSB</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F EC /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Add packed signed byte integers from xmm2/m128 and xmm1 and saturate the results.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PADDSW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F ED /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Add packed signed word integers from xmm2/m128 and xmm1 and saturate the results.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDSB</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F EC</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Add packed signed byte integers from xmm2, and xmm3/m128 and store the saturated results in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDSW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F ED</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Add packed signed word integers from xmm2, and xmm3/m128 and store the saturated results in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDSB</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F EC</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Add packed signed byte integers from ymm2, and ymm3/m256 and store the saturated results in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDSW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F ED</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Add packed signed word integers from ymm2, and ymm3/m256 and store the saturated results in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDSB</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG EC /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add packed signed byte integers from xmm2, and xmm3/m128 and store the saturated results in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDSB</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG EC /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add packed signed byte integers from ymm2, and ymm3/m256 and store the saturated results in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDSB</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG EC /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add packed signed byte integers from zmm2, and zmm3/m512 and store the saturated results in zmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDSW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG ED /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add packed signed word integers from xmm2, and xmm3/m128 and store the saturated results in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDSW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG ED /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add packed signed word integers from ymm2, and ymm3/m256 and store the saturated results in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDSW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG ED /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add packed signed word integers from zmm2, and zmm3/m512 and store the saturated results in zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PADDUSB/PADDUSW--Add Packed Unsigned Integers with Unsigned Saturation.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PADDUSB</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F DC /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Add packed unsigned byte integers from xmm2/m128 and xmm1 and saturate the results.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PADDUSW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F DD /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Add packed unsigned word integers from xmm2/m128 and xmm1 and saturate the results.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDUSB</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F DC</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Add packed unsigned byte integers from xmm2, and xmm3/m128 and store the saturated results in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDUSW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F DD</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Add packed unsigned word integers from xmm2, and xmm3/m128 and store the saturated results in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDUSB</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F DC</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Add packed unsigned byte integers from ymm2, and ymm3/m256 and store the saturated results in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDUSW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F DD</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Add packed unsigned word integers from ymm2, and ymm3/m256 and store the saturated results in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDUSB</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG DC /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add packed unsigned byte integers from xmm2, and xmm3/m128 and store the saturated results in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDUSB</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG DC /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add packed unsigned byte integers from ymm2, and ymm3/m256 and store the saturated results in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDUSB</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG DC /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add packed unsigned byte integers from zmm2, and zmm3/m512 and store the saturated results in zmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDUSW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG DD /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add packed unsigned word integers from xmm2, and xmm3/m128 and store the saturated results in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDUSW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG DD /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add packed unsigned word integers from ymm2, and ymm3/m256 and store the saturated results in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPADDUSW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG DD /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add packed unsigned word integers from zmm2, and zmm3/m512 and store the saturated results in zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PALIGNR--Byte Align.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PALIGNR</mnem>
			<args>xmm1,xmm2/m128,imm8</args>
			<opc openc="RM">66 0F 3A 0F /r ib</opc>
			<cpuid>
				<flag>SSSE3</flag>
			</cpuid>
			<dscrp>Concatenate destination and source operands, extract byte aligned result shifted to the right by constant value in imm8 and result is stored in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPALIGNR</mnem>
			<args>xmm1,xmm2,xmm3/m128,imm8</args>
			<opc openc="RVM">VEX.NDS.128.66.0F3A 0F /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Concatenate xmm2 and xmm3/m128 into a 32-byte intermediate result, extract byte aligned result shifted to the right by constant value in imm8 and result is stored in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPALIGNR</mnem>
			<args>ymm1,ymm2,ymm3/m256,imm8</args>
			<opc openc="RVM">VEX.NDS.256.66.0F3A 0F /r ib</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Concatenate pairs of 16 bytes in ymm2 and ymm3/m256 into 32-byte intermediate result, extract byte-aligned, 16-byte result shifted to the right by constant values in imm8 from each intermediate result, and two 16-byte results are stored in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPALIGNR</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128,imm8</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F3A.WIG 0F /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Concatenate xmm2 and xmm3/m128 into a 32-byte intermediate result, extract byte aligned result shifted to the right by constant value in imm8 and result is stored in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPALIGNR</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256,imm8</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F3A.WIG 0F /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Concatenate pairs of 16 bytes in ymm2 and ymm3/m256 into 32-byte intermediate result, extract byte-aligned, 16-byte result shifted to the right by constant values in imm8 from each intermediate result, and two 16-byte results are stored in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPALIGNR</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512,imm8</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F3A.WIG 0F /r ib</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Concatenate pairs of 16 bytes in zmm2 and zmm3/m512 into 32-byte intermediate result, extract byte-aligned, 16-byte result shifted to the right by constant values in imm8 from each intermediate result, and four 16-byte results are stored in zmm1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PAND--Logical AND.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PAND</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F DB /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Bitwise AND of xmm2/m128 and xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPAND</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG DB /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Bitwise AND of xmm2, and xmm3/m128 and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPAND</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG DB /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Bitwise AND of ymm2, and ymm3/m256 and store result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPANDD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W0 DB /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND of packed doubleword integers in xmm2 and xmm3/m128/m32bcst and store result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPANDD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W0 DB /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND of packed doubleword integers in ymm2 and ymm3/m256/m32bcst and store result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPANDD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W0 DB /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND of packed doubleword integers in zmm2 and zmm3/m512/m32bcst and store result in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPANDQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 DB /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND of packed quadword integers in xmm2 and xmm3/m128/m64bcst and store result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPANDQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 DB /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND of packed quadword integers in ymm2 and ymm3/m256/m64bcst and store result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPANDQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 DB /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND of packed quadword integers in zmm2 and zmm3/m512/m64bcst and store result in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PANDN--Logical AND NOT.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PANDN</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F DF /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Bitwise AND NOT of xmm2/m128 and xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPANDN</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG DF /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Bitwise AND NOT of xmm2, and xmm3/m128 and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPANDN</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG DF /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Bitwise AND NOT of ymm2, and ymm3/m256 and store result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPANDND</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W0 DF /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND NOT of packed doubleword integers in xmm2 and xmm3/m128/m32bcst and store result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPANDND</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W0 DF /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND NOT of packed doubleword integers in ymm2 and ymm3/m256/m32bcst and store result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPANDND</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W0 DF /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND NOT of packed doubleword integers in zmm2 and zmm3/m512/m32bcst and store result in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPANDNQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 DF /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND NOT of packed quadword integers in xmm2 and xmm3/m128/m64bcst and store result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPANDNQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 DF /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND NOT of packed quadword integers in ymm2 and ymm3/m256/m64bcst and store result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPANDNQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 DF /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND NOT of packed quadword integers in zmm2 and zmm3/m512/m64bcst and store result in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PAVGB/PAVGW--Average Packed Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PAVGB</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F E0,/r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Average packed unsigned byte integers from xmm2/m128 and xmm1 with rounding.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PAVGW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F E3,/r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPAVGB</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F E0</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Average packed unsigned byte integers from xmm2, and xmm3/m128 with rounding and store to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPAVGW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F E3</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Average packed unsigned word integers from xmm2, xmm3/m128 with rounding to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPAVGB</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F E0</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Average packed unsigned byte integers from ymm2, and ymm3/m256 with rounding and store to ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPAVGW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F E3</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Average packed unsigned word integers from ymm2, ymm3/m256 with rounding to ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPAVGB</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG E0 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Average packed unsigned byte integers from xmm2, and xmm3/m128 with rounding and store to xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPAVGB</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG E0 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Average packed unsigned byte integers from ymm2, and ymm3/m256 with rounding and store to ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPAVGB</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG E0 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Average packed unsigned byte integers from zmm2, and zmm3/m512 with rounding and store to zmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPAVGW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG E3 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Average packed unsigned word integers from xmm2, xmm3/m128 with rounding to xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPAVGW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG E3 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Average packed unsigned word integers from ymm2, ymm3/m256 with rounding to ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPAVGW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG E3 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Average packed unsigned word integers from zmm2, zmm3/m512 with rounding to zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPBROADCASTM--Broadcast Mask to Vector Register.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTMB2Q</mnem>
			<args>xmm1,k1</args>
			<opc openc="RM">EVEX.128.F3.0F38.W1 2A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512CD</flag>
			</cpuid>
			<dscrp>Broadcast low byte value in k1 to two locations in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTMB2Q</mnem>
			<args>ymm1,k1</args>
			<opc openc="RM">EVEX.256.F3.0F38.W1 2A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512CD</flag>
			</cpuid>
			<dscrp>Broadcast low byte value in k1 to four locations in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTMB2Q</mnem>
			<args>zmm1,k1</args>
			<opc openc="RM">EVEX.512.F3.0F38.W1 2A /r</opc>
			<cpuid>
				<flag>AVX512CD</flag>
			</cpuid>
			<dscrp>Broadcast low byte value in k1 to eight locations in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTMW2D</mnem>
			<args>xmm1,k1</args>
			<opc openc="RM">EVEX.128.F3.0F38.W0 3A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512CD</flag>
			</cpuid>
			<dscrp>Broadcast low word value in k1 to four locations in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTMW2D</mnem>
			<args>ymm1,k1</args>
			<opc openc="RM">EVEX.256.F3.0F38.W0 3A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512CD</flag>
			</cpuid>
			<dscrp>Broadcast low word value in k1 to eight locations in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPBROADCASTMW2D</mnem>
			<args>zmm1,k1</args>
			<opc openc="RM">EVEX.512.F3.0F38.W0 3A /r</opc>
			<cpuid>
				<flag>AVX512CD</flag>
			</cpuid>
			<dscrp>Broadcast low word value in k1 to sixteen locations in zmm1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PCMPEQB/PCMPEQW/PCMPEQD/PCMPEQQ--Compare Packed Integers for Equality.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PCMPEQB</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 74 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Compare packed bytes in xmm2/m128 and xmm1 for equality.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PCMPEQW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 75 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Compare packed words in xmm2/m128 and xmm1 for equality.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PCMPEQD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 76 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Compare packed doublewords in xmm2/m128 and xmm1 for equality.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PCMPEQQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 29 /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Compare packed quadwords in xmm2/m128 and xmm1 for equality.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQB</mnem>
			<args>xmm1,xmm2,xmm3 /m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 74 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed bytes in xmm3/m128 and xmm2 for equality.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 75 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed words in xmm3/m128 and xmm2 for equality.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 76 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed doublewords in xmm3/m128 and xmm2 for equality.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQQ</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 29 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed quadwords in xmm3/m128 and xmm2 for equality.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQB</mnem>
			<args>ymm1,ymm2,ymm3 /m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 74 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed bytes in ymm3/m256 and ymm2 for equality.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQW</mnem>
			<args>ymm1,ymm2,ymm3 /m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 75 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed words in ymm3/m256 and ymm2 for equality.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQD</mnem>
			<args>ymm1,ymm2,ymm3 /m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 76 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed doublewords in ymm3/m256 and ymm2 for equality.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQQ</mnem>
			<args>ymm1,ymm2,ymm3 /m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 29 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed quadwords in ymm3/m256 and ymm2 for equality.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQD</mnem>
			<args>k1 {k2},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W0 76 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare Equal between int32 vector xmm2 and int32 vector xmm3/m128/m32bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQD</mnem>
			<args>k1 {k2},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W0 76 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare Equal between int32 vector ymm2 and int32 vector ymm3/m256/m32bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQD</mnem>
			<args>k1 {k2},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W0 76 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare Equal between int32 vectors in zmm2 and zmm3/m512/m32bcst, and set destination k1 according to the comparison results under writemask k2,.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQQ</mnem>
			<args>k1 {k2},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 29 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare Equal between int64 vector xmm2 and int64 vector xmm3/m128/m64bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQQ</mnem>
			<args>k1 {k2},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 29 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare Equal between int64 vector ymm2 and int64 vector ymm3/m256/m64bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQQ</mnem>
			<args>k1 {k2},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 29 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare Equal between int64 vector zmm2 and int64 vector zmm3/m512/m64bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQB</mnem>
			<args>k1 {k2},xmm2,xmm3 /m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 74 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed bytes in xmm3/m128 and xmm2 for equality and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQB</mnem>
			<args>k1 {k2},ymm2,ymm3 /m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 74 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed bytes in ymm3/m256 and ymm2 for equality and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQB</mnem>
			<args>k1 {k2},zmm2,zmm3 /m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 74 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed bytes in zmm3/m512 and zmm2 for equality and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQW</mnem>
			<args>k1 {k2},xmm2,xmm3 /m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 75 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed words in xmm3/m128 and xmm2 for equality and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQW</mnem>
			<args>k1 {k2},ymm2,ymm3 /m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 75 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed words in ymm3/m256 and ymm2 for equality and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPEQW</mnem>
			<args>k1 {k2},zmm2,zmm3 /m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 75 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed words in zmm3/m512 and zmm2 for equality and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PCMPGTB/PCMPGTW/PCMPGTD/PCMPGTQ--Compare Packed Integers for Greater Than.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PCMPGTB</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 64 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Compare packed signed byte integers in xmm1 and xmm2/m128 for greater than.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PCMPGTW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 65 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in xmm1 and xmm2/m128 for greater than.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PCMPGTD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 66 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Compare packed signed doubleword integers in xmm1 and xmm2/m128 for greater than.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PCMPGTQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 37 /r</opc>
			<cpuid>
				<flag>SSE4_2</flag>
			</cpuid>
			<dscrp>Compare packed qwords in xmm2/m128 and xmm1 for greater than.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTB</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 64 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 for greater than.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 65 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in xmm2 and xmm3/m128 for greater than.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 66 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed signed doubleword integers in xmm2 and xmm3/m128 for greater than.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTQ</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 37 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed signed qwords in xmm2 and xmm3/m128 for greater than.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTB</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 64 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 for greater than.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 65 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in ymm2 and ymm3/m256 for greater than.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 66 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed signed doubleword integers in ymm2 and ymm3/m256 for greater than.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTQ</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 37 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed signed qwords in ymm2 and ymm3/m256 for greater than.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTD</mnem>
			<args>k1 {k2},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W0 66 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare Greater between int32 vector xmm2 and int32 vector xmm3/m128/m32bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTD</mnem>
			<args>k1 {k2},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W0 66 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare Greater between int32 vector ymm2 and int32 vector ymm3/m256/m32bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTD</mnem>
			<args>k1 {k2},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W0 66 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare Greater between int32 elements in zmm2 and zmm3/m512/m32bcst, and set destination k1 according to the comparison results under writemask. k2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTQ</mnem>
			<args>k1 {k2},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 37 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare Greater between int64 vector xmm2 and int64 vector xmm3/m128/m64bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTQ</mnem>
			<args>k1 {k2},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 37 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare Greater between int64 vector ymm2 and int64 vector ymm3/m256/m64bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTQ</mnem>
			<args>k1 {k2},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 37 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare Greater between int64 vector zmm2 and int64 vector zmm3/m512/m64bcst, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTB</mnem>
			<args>k1 {k2},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 64 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 for greater than, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTB</mnem>
			<args>k1 {k2},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 64 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 for greater than, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTB</mnem>
			<args>k1 {k2},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 64 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed byte integers in zmm2 and zmm3/m512 for greater than, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTW</mnem>
			<args>k1 {k2},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 65 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in xmm2 and xmm3/m128 for greater than, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTW</mnem>
			<args>k1 {k2},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 65 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in ymm2 and ymm3/m256 for greater than, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPGTW</mnem>
			<args>k1 {k2},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 65 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in zmm2 and zmm3/m512 for greater than, and set vector mask k1 to reflect the zero/nonzero status of each element of the result, under writemask.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPCMPB/VPCMPUB--Compare Packed Byte Values Into Mask.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPB</mnem>
			<args>k1 {k2},xmm2,xmm3/m128,imm8</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F3A.W0 3F /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed byte values in xmm3/m128 and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPB</mnem>
			<args>k1 {k2},ymm2,ymm3/m256,imm8</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F3A.W0 3F /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed byte values in ymm3/m256 and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPB</mnem>
			<args>k1 {k2},zmm2,zmm3/m512,imm8</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F3A.W0 3F /r ib</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed byte values in zmm3/m512 and zmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPUB</mnem>
			<args>k1 {k2},xmm2,xmm3/m128,imm8</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F3A.W0 3E /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed unsigned byte values in xmm3/m128 and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPUB</mnem>
			<args>k1 {k2},ymm2,ymm3/m256,imm8</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F3A.W0 3E /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed unsigned byte values in ymm3/m256 and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPUB</mnem>
			<args>k1 {k2},zmm2,zmm3/m512,imm8</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F3A.W0 3E /r ib</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed unsigned byte values in zmm3/m512 and zmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPCMPD/VPCMPUD--Compare Packed Integer Values into Mask.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPD</mnem>
			<args>k1 {k2},xmm2,xmm3/m128/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.128.66.0F3A.W0 1F /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed signed doubleword integer values in xmm3/m128/m32bcst and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPD</mnem>
			<args>k1 {k2},ymm2,ymm3/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 1F /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed signed doubleword integer values in ymm3/m256/m32bcst and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPD</mnem>
			<args>k1 {k2},zmm2,zmm3/m512/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 1F /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed signed doubleword integer values in zmm2 and zmm3/m512/m32bcst using bits 2:0 of imm8 as a comparison predicate. The comparison results are written to the destination k1 under writemask k2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPUD</mnem>
			<args>k1 {k2},xmm2,xmm3/m128/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.128.66.0F3A.W0 1E /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed unsigned doubleword integer values in xmm3/m128/m32bcst and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPUD</mnem>
			<args>k1 {k2},ymm2,ymm3/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 1E /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed unsigned doubleword integer values in ymm3/m256/m32bcst and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPUD</mnem>
			<args>k1 {k2},zmm2,zmm3/m512/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 1E /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed unsigned doubleword integer values in zmm2 and zmm3/m512/m32bcst using bits 2:0 of imm8 as a comparison predicate. The comparison results are written to the destination k1 under writemask k2.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPCMPQ/VPCMPUQ--Compare Packed Integer Values into Mask.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPQ</mnem>
			<args>k1 {k2},xmm2,xmm3/m128/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.128.66.0F3A.W1 1F /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed signed quadword integer values in xmm3/m128/m64bcst and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPQ</mnem>
			<args>k1 {k2},ymm2,ymm3/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 1F /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed signed quadword integer values in ymm3/m256/m64bcst and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPQ</mnem>
			<args>k1 {k2},zmm2,zmm3/m512/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 1F /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed signed quadword integer values in zmm3/m512/m64bcst and zmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPUQ</mnem>
			<args>k1 {k2},xmm2,xmm3/m128/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.128.66.0F3A.W1 1E /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed unsigned quadword integer values in xmm3/m128/m64bcst and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPUQ</mnem>
			<args>k1 {k2},ymm2,ymm3/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 1E /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed unsigned quadword integer values in ymm3/m256/m64bcst and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPUQ</mnem>
			<args>k1 {k2},zmm2,zmm3/m512/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 1E /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed unsigned quadword integer values in zmm3/m512/m64bcst and zmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPCMPW/VPCMPUW--Compare Packed Word Values Into Mask.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPW</mnem>
			<args>k1 {k2},xmm2,xmm3/m128,imm8</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F3A.W1 3F /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in xmm3/m128 and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPW</mnem>
			<args>k1 {k2},ymm2,ymm3/m256,imm8</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F3A.W1 3F /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in ymm3/m256 and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPW</mnem>
			<args>k1 {k2},zmm2,zmm3/m512,imm8</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F3A.W1 3F /r ib</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in zmm3/m512 and zmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPUW</mnem>
			<args>k1 {k2},xmm2,xmm3/m128,imm8</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F3A.W1 3E /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed unsigned word integers in xmm3/m128 and xmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPUW</mnem>
			<args>k1 {k2},ymm2,ymm3/m256,imm8</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F3A.W1 3E /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed unsigned word integers in ymm3/m256 and ymm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCMPUW</mnem>
			<args>k1 {k2},zmm2,zmm3/m512,imm8</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F3A.W1 3E /r ib</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed unsigned word integers in zmm3/m512 and zmm2 using bits 2:0 of imm8 as a comparison predicate with writemask k2 and leave the result in mask register k1.</dscrp>
		</ins>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPCOMPRESSD--Store Sparse Packed Doubleword Integer Values into Dense Memory/Register.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPCOMPRESSD</mnem>
			<args>xmm1/m128 {k1}{z},xmm2</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 8B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compress packed doubleword integer values from xmm2 to xmm1/m128 using controlmask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCOMPRESSD</mnem>
			<args>ymm1/m256 {k1}{z},ymm2</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 8B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compress packed doubleword integer values from ymm2 to ymm1/m256 using controlmask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCOMPRESSD</mnem>
			<args>zmm1/m512 {k1}{z},zmm2</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 8B /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compress packed doubleword integer values from zmm2 to zmm1/m512 using controlmask k1.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPCOMPRESSQ--Store Sparse Packed Quadword Integer Values into Dense Memory/Register.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPCOMPRESSQ</mnem>
			<args>xmm1/m128 {k1}{z},xmm2</args>
			<opc openc="T1S">EVEX.128.66.0F38.W1 8B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compress packed quadword integer values from xmm2 to xmm1/m128 using controlmask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCOMPRESSQ</mnem>
			<args>ymm1/m256 {k1}{z},ymm2</args>
			<opc openc="T1S">EVEX.256.66.0F38.W1 8B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compress packed quadword integer values from ymm2 to ymm1/m256 using controlmask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCOMPRESSQ</mnem>
			<args>zmm1/m512 {k1}{z},zmm2</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 8B /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compress packed quadword integer values from zmm2 to zmm1/m512 using controlmask k1.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPCONFLICTD/Q--Detect Conflicts Within a Vector of Packed Dword/Qword Values into Dense Memory/ Register.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPCONFLICTD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="FV">EVEX.128.66.0F38.W0 C4 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512CD</flag>
			</cpuid>
			<dscrp>Detect duplicate double-word values in xmm2/m128/m32bcst using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCONFLICTD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
			<opc openc="FV">EVEX.256.66.0F38.W0 C4 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512CD</flag>
			</cpuid>
			<dscrp>Detect duplicate double-word values in ymm2/m256/m32bcst using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCONFLICTD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst</args>
			<opc openc="FV">EVEX.512.66.0F38.W0 C4 /r</opc>
			<cpuid>
				<flag>AVX512CD</flag>
			</cpuid>
			<dscrp>Detect duplicate double-word values in zmm2/m512/m32bcst using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCONFLICTQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.66.0F38.W1 C4 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512CD</flag>
			</cpuid>
			<dscrp>Detect duplicate quad-word values in xmm2/m128/m64bcst using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCONFLICTQ</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.66.0F38.W1 C4 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512CD</flag>
			</cpuid>
			<dscrp>Detect duplicate quad-word values in ymm2/m256/m64bcst using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPCONFLICTQ</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst</args>
			<opc openc="FV">EVEX.512.66.0F38.W1 C4 /r</opc>
			<cpuid>
				<flag>AVX512CD</flag>
			</cpuid>
			<dscrp>Detect duplicate quad-word values in zmm2/m512/m64bcst using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPERMB--Permute Packed Bytes Elements.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPERMB</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F38.W0 8D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512VBMI</flag>
			</cpuid>
			<dscrp>Permute bytes in xmm3/m128 using byte indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMB</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F38.W0 8D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512VBMI</flag>
			</cpuid>
			<dscrp>Permute bytes in ymm3/m256 using byte indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMB</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F38.W0 8D /r</opc>
			<cpuid>
				<flag>AVX512VBMI</flag>
			</cpuid>
			<dscrp>Permute bytes in zmm3/m512 using byte indexes in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPERMD/VPERMW--Permute Packed Doublewords/Words Elements.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPERMD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 36 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Permute doublewords in ymm3/m256 using indices in ymm2 and store the result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 36 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute doublewords in ymm3/m256/m32bcst using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 36 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute doublewords in zmm3/m512/m32bcst using indices in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F38.W1 8D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Permute word integers in xmm3/m128 using indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F38.W1 8D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Permute word integers in ymm3/m256 using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F38.W1 8D /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Permute word integers in zmm3/m512 using indexes in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPERMI2B--Full Permute of Bytes From Two Tables Overwriting the Index.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPERMI2B</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.DDS.128.66.0F38.W0 75 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512VBMI</flag>
			</cpuid>
			<dscrp>Permute bytes in xmm3/m128 and xmm2 using byte indexes in xmm1 and store the byte results in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMI2B</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.DDS.256.66.0F38.W0 75 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512VBMI</flag>
			</cpuid>
			<dscrp>Permute bytes in ymm3/m256 and ymm2 using byte indexes in ymm1 and store the byte results in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMI2B</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.DDS.512.66.0F38.W0 75 /r</opc>
			<cpuid>
				<flag>AVX512VBMI</flag>
			</cpuid>
			<dscrp>Permute bytes in zmm3/m512 and zmm2 using byte indexes in zmm1 and store the byte results in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPERMI2W/D/Q/PS/PD--Full Permute From Two Tables Overwriting the Index.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPERMI2W</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.DDS.128.66.0F38.W1 75 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Permute word integers from two tables in xmm3/m128 and xmm2 using indexes in xmm1 and store the result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMI2W</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.DDS.256.66.0F38.W1 75 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Permute word integers from two tables in ymm3/m256 and ymm2 using indexes in ymm1 and store the result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMI2W</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.DDS.512.66.0F38.W1 75 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Permute word integers from two tables in zmm3/m512 and zmm2 using indexes in zmm1 and store the result in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMI2D</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 76 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-words from two tables in xmm3/m128/m32bcst and xmm2 using indexes in xmm1 and store the result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMI2D</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 76 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-words from two tables in ymm3/m256/m32bcst and ymm2 using indexes in ymm1 and store the result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMI2D</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 76 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-words from two tables in zmm3/m512/m32bcst and zmm2 using indices in zmm1 and store the result in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMI2Q</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 76 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute quad-words from two tables in xmm3/m128/m64bcst and xmm2 using indexes in xmm1 and store the result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMI2Q</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 76 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute quad-words from two tables in ymm3/m256/m64bcst and ymm2 using indexes in ymm1 and store the result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMI2Q</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 76 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute quad-words from two tables in zmm3/m512/m64bcst and zmm2 using indices in zmm1 and store the result in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMI2PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 77 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute single-precision FP values from two tables in xmm3/m128/m32bcst and xmm2 using indexes in xmm1 and store the result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMI2PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 77 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute single-precision FP values from two tables in ymm3/m256/m32bcst and ymm2 using indexes in ymm1 and store the result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMI2PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 77 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute single-precision FP values from two tables in zmm3/m512/m32bcst and zmm2 using indices in zmm1 and store the result in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMI2PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 77 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-precision FP values from two tables in xmm3/m128/m64bcst and xmm2 using indexes in xmm1 and store the result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMI2PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 77 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-precision FP values from two tables in ymm3/m256/m64bcst and ymm2 using indexes in ymm1 and store the result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMI2PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 77 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-precision FP values from two tables in zmm3/m512/m64bcst and zmm2 using indices in zmm1 and store the result in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPERMT2B--Full Permute of Bytes From Two Tables Overwriting a Table.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPERMT2B</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.DDS.128.66.0F38.W0 7D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512VBMI</flag>
			</cpuid>
			<dscrp>Permute bytes in xmm3/m128 and xmm1 using byte indexes in xmm2 and store the byte results in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMT2B</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F38.W0 7D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512VBMI</flag>
			</cpuid>
			<dscrp>Permute bytes in ymm3/m256 and ymm1 using byte indexes in ymm2 and store the byte results in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMT2B</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F38.W0 7D /r</opc>
			<cpuid>
				<flag>AVX512VBMI</flag>
			</cpuid>
			<dscrp>Permute bytes in zmm3/m512 and zmm1 using byte indexes in zmm2 and store the byte results in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPERMT2W/D/Q/PS/PD--Full Permute from Two Tables Overwriting one Table.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPERMT2W</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.DDS.128.66.0F38.W1 7D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Permute word integers from two tables in xmm3/m128 and xmm1 using indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMT2W</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.DDS.256.66.0F38.W1 7D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Permute word integers from two tables in ymm3/m256 and ymm1 using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMT2W</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.DDS.512.66.0F38.W1 7D /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Permute word integers from two tables in zmm3/m512 and zmm1 using indexes in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMT2D</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 7E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-words from two tables in xmm3/m128/m32bcst and xmm1 using indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMT2D</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 7E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-words from two tables in ymm3/m256/m32bcst and ymm1 using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMT2D</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 7E /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-words from two tables in zmm3/m512/m32bcst and zmm1 using indices in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMT2Q</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 7E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute quad-words from two tables in xmm3/m128/m64bcst and xmm1 using indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMT2Q</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 7E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute quad-words from two tables in ymm3/m256/m64bcst and ymm1 using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMT2Q</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 7E /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute quad-words from two tables in zmm3/m512/m64bcst and zmm1 using indices in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMT2PS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W0 7F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute single-precision FP values from two tables in xmm3/m128/m32bcst and xmm1 using indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMT2PS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W0 7F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute single-precision FP values from two tables in ymm3/m256/m32bcst and ymm1 using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMT2PS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W0 7F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute single-precision FP values from two tables in zmm3/m512/m32bcst and zmm1 using indices in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMT2PD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 7F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-precision FP values from two tables in xmm3/m128/m64bcst and xmm1 using indexes in xmm2 and store the result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMT2PD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 7F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-precision FP values from two tables in ymm3/m256/m64bcst and ymm1 using indexes in ymm2 and store the result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMT2PD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 7F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-precision FP values from two tables in zmm3/m512/m64bcst and zmm1 using indices in zmm2 and store the result in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPERMILPD--Permute In-Lane of Pairs of Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 0D /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Permute double-precision floating-point values in xmm2 using controls from xmm3/m128 and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 0D /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Permute double-precision floating-point values in ymm2 using controls from ymm3/m256 and store result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 0D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-precision floating-point values in xmm2 using control from xmm3/m128/m64bcst and store the result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 0D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-precision floating-point values in ymm2 using control from ymm3/m256/m64bcst and store the result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 0D /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-precision floating-point values in zmm2 using control from zmm3/m512/m64bcst and store the result in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPD</mnem>
			<args>xmm1,xmm2/m128,imm8</args>
			<opc openc="RM">VEX.128.66.0F3A.W0 05 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Permute double-precision floating-point values in xmm2/m128 using controls from imm8.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPD</mnem>
			<args>ymm1,ymm2/m256,imm8</args>
			<opc openc="RM">VEX.256.66.0F3A.W0 05 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Permute double-precision floating-point values in ymm2/m256 using controls from imm8.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
			<opc openc="FV">EVEX.128.66.0F3A.W1 05 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-precision floating-point values in xmm2/m128/m64bcst using controls from imm8 and store the result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.256.66.0F3A.W1 05 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-precision floating-point values in ymm2/m256/m64bcst using controls from imm8 and store the result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
			<opc openc="FV">EVEX.512.66.0F3A.W1 05 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-precision floating-point values in zmm2/m512/m64bcst using controls from imm8 and store the result in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV-RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV-RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPERMILPS--Permute In-Lane of Quadruples of Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 0C /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Permute single-precision floating-point values in xmm2 using controls from xmm3/m128 and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPS</mnem>
			<args>xmm1,xmm2/m128,imm8</args>
			<opc openc="RM">VEX.128.66.0F3A.W0 04 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Permute single-precision floating-point values in xmm2/m128 using controls from imm8 and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 0C /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Permute single-precision floating-point values in ymm2 using controls from ymm3/m256 and store result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPS</mnem>
			<args>ymm1,ymm2/m256,imm8</args>
			<opc openc="RM">VEX.256.66.0F3A.W0 04 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Permute single-precision floating-point values in ymm2/m256 using controls from imm8 and store result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 0C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute single-precision floating-point values xmm2 using control from xmm3/m128/m32bcst and store the result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 0C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute single-precision floating-point values ymm2 using control from ymm3/m256/m32bcst and store the result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 0C /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute single-precision floating-point values zmm2 using control from zmm3/m512/m32bcst and store the result in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPS</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
			<opc openc="FV">EVEX.128.66.0F3A.W0 04 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute single-precision floating-point values xmm2/m128/m32bcst using controls from imm8 and store the result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPS</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.256.66.0F3A.W0 04 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute single-precision floating-point values ymm2/m256/m32bcst using controls from imm8 and store the result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMILPS</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
			<opc openc="FV">EVEX.512.66.0F3A.W0 04 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute single-precision floating-point values zmm2/m512/m32bcst using controls from imm8 and store the result in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV-RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV-RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPERMPD--Permute Double-Precision Floating-Point Elements.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPERMPD</mnem>
			<args>ymm1,ymm2/m256,imm8</args>
			<opc openc="RMI">VEX.256.66.0F3A.W1 01 /r ib</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Permute double-precision floating-point elements in ymm2/m256 using indices in imm8 and store the result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMPD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.256.66.0F3A.W1 01 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-precision floating-point elements in ymm2/m256/m64bcst using indexes in imm8 and store the result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMPD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
			<opc openc="FV">EVEX.512.66.0F3A.W1 01 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-precision floating-point elements in zmm2/m512/m64bcst using indices in imm8 and store the result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMPD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 16 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-precision floating-point elements in ymm3/m256/m64bcst using indexes in ymm2 and store the result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMPD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 16 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute double-precision floating-point elements in zmm3/m512/m64bcst using indices in zmm2 and store the result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV-RMI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV-RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPERMPS--Permute Single-Precision Floating-Point Elements.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPERMPS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.256.66.0F38.W0 16 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Permute single-precision floating-point elements in ymm3/m256 using indices in ymm2 and store the result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMPS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 16 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute single-precision floating-point elements in ymm3/m256/m32bcst using indexes in ymm2 and store the result in ymm1 subject to write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMPS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 16 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute single-precision floating-point values in zmm3/m512/m32bcst using indices in zmm2 and store the result in zmm1 subject to write mask k1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPERMQ--Qwords Element Permutation.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPERMQ</mnem>
			<args>ymm1,ymm2/m256,imm8</args>
			<opc openc="RMI">VEX.256.66.0F3A.W1 00 /r ib</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Permute qwords in ymm2/m256 using indices in imm8 and store the result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMQ</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.256.66.0F3A.W1 00 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute qwords in ymm2/m256/m64bcst using indexes in imm8 and store the result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMQ</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
			<opc openc="FV">EVEX.512.66.0F3A.W1 00 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute qwords in zmm2/m512/m64bcst using indices in imm8 and store the result in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 36 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute qwords in ymm3/m256/m64bcst using indexes in ymm2 and store the result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPERMQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 36 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Permute qwords in zmm3/m512/m64bcst using indices in zmm2 and store the result in zmm1.</dscrp>
		</ins>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV-RMI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV-RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPEXPANDD--Load Sparse Packed Doubleword Integer Values from Dense Memory / Register.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPEXPANDD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 89 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Expand packed double-word integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPEXPANDD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 89 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Expand packed double-word integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPEXPANDD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 89 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Expand packed double-word integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPEXPANDQ--Load Sparse Packed Quadword Integer Values from Dense Memory / Register.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPEXPANDQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128</args>
			<opc openc="T1S">EVEX.128.66.0F38.W1 89 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Expand packed quad-word integer values from xmm2/m128 to xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPEXPANDQ</mnem>
			<args>ymm1 {k1}{z},ymm2/m256</args>
			<opc openc="T1S">EVEX.256.66.0F38.W1 89 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Expand packed quad-word integer values from ymm2/m256 to ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPEXPANDQ</mnem>
			<args>zmm1 {k1}{z},zmm2/m512</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 89 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Expand packed quad-word integer values from zmm2/m512 to zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PEXTRB/PEXTRW/PEXTRD/PEXTRQ--Extract Integer.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PEXTRB</mnem>
			<args>reg/m8,xmm2,imm8</args>
			<opc openc="MRI">66 0F 3A 14 /r ib</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Extract a byte integer value from xmm2 at the source byte offset specified by imm8 into reg or m8. The upper bits of r64/r32 is filled with zeros.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PEXTRW</mnem>
			<args>reg,xmm1,imm8</args>
			<opc openc="RMI">66 0F C5 /r ib</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Extract the word specified by imm8 from xmm1 and move it to reg, bits 15:0. The upper bits of r64/r32 is filled with zeros.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PEXTRW</mnem>
			<args>reg/m16,xmm2,imm8</args>
			<opc openc="MRI">66 0F 3A 15 /r ib</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Extract a word integer value from xmm2 at the source word offset specified by imm8 into reg or m16. The upper bits of r64/r32 is filled with zeros.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PEXTRD</mnem>
			<args>r32/m32,xmm2,imm8</args>
			<opc openc="MRI">66 0F 3A 16 /r ib</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Extract a dword integer value from xmm2 at the source dword offset specified by imm8 into r32/m32.</dscrp>
		</ins>
		<ins x32m="NE" x64m="V">
			<mnem>PEXTRQ</mnem>
			<args>r64/m64,xmm2,imm8</args>
			<opc openc="MRI">66 REX.W 0F 3A 16 /r ib</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Extract a qword integer value from xmm2 at the source dword offset specified by imm8 into r64/m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPEXTRB</mnem>
			<args>reg/m8,xmm2,imm8</args>
			<opc openc="MRI">VEX.128.66.0F3A 14 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Extract a byte integer value from xmm2 at the source byte offset specified by imm8 into reg or m8. The upper bits of r64/r32 is filled with zeros.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPEXTRW</mnem>
			<args>reg,xmm1,imm8</args>
			<opc openc="RMI">VEX.128.66.0F C5 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Extract the word specified by imm8 from xmm1 and move it to reg, bits 15:0. Zero-extend the result. The upper bits of r64/r32 is filled with zeros.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPEXTRW</mnem>
			<args>reg/m16,xmm2,imm8</args>
			<opc openc="MRI">VEX.128.66.0F3A 15 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Extract a word integer value from xmm2 at the source word offset specified by imm8 into reg or m16. The upper bits of r64/r32 is filled with zeros.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPEXTRD</mnem>
			<args>r32/m32,xmm2,imm8</args>
			<opc openc="MRI">VEX.128.66.0F3A.W0 16 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Extract a dword integer value from xmm2 at the source dword offset specified by imm8 into r32/m32.</dscrp>
		</ins>
		<ins x32m="NE" x64m="V">
			<mnem>VPEXTRQ</mnem>
			<args>r64/m64,xmm2,imm8</args>
			<opc openc="MRI">VEX.128.66.0F3A.W1 16 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Extract a qword integer value from xmm2 at the source dword offset specified by imm8 into r64/m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPEXTRB</mnem>
			<args>reg/m8,xmm2,imm8</args>
			<opc openc="T1S">EVEX.128.66.0F3A.WIG 14 /r ib</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Extract a byte integer value from xmm2 at the source byte offset specified by imm8 into reg or m8. The upper bits of r64/r32 is filled with zeros.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPEXTRW</mnem>
			<args>reg,xmm1,imm8</args>
			<opc openc="RMI">EVEX.128.66.0F.WIG C5 /r ib</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Extract the word specified by imm8 from xmm1 and move it to reg, bits 15:0. Zero-extend the result. The upper bits of r64/r32 is filled with zeros.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPEXTRW</mnem>
			<args>reg/m16,xmm2,imm8</args>
			<opc openc="T1S">EVEX.128.66.0F3A.WIG 15 /r ib</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Extract a word integer value from xmm2 at the source word offset specified by imm8 into reg or m16. The upper bits of r64/r32 is filled with zeros.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPEXTRD</mnem>
			<args>r32/m32,xmm2,imm8</args>
			<opc openc="T1S">EVEX.128.66.0F3A.W0 16 /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Extract a dword integer value from xmm2 at the source dword offset specified by imm8 into r32/m32.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VPEXTRQ</mnem>
			<args>r64/m64,xmm2,imm8</args>
			<opc openc="T1S">EVEX.128.66.0F3A.W1 16 /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Extract a qword integer value from xmm2 at the source dword offset specified by imm8 into r64/m64.</dscrp>
		</ins>
		<oprndenc openc="MRI">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S-MRI">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPLZCNTD/Q--Count the Number of Leading Zero Bits for Packed Dword, Packed Qword Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPLZCNTD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="FV">EVEX.128.66.0F38.W0 44 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512CD</flag>
			</cpuid>
			<dscrp>Count the number of leading zero bits in each dword element of xmm2/m128/m32bcst using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPLZCNTD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
			<opc openc="FV">EVEX.256.66.0F38.W0 44 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512CD</flag>
			</cpuid>
			<dscrp>Count the number of leading zero bits in each dword element of ymm2/m256/m32bcst using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPLZCNTD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst</args>
			<opc openc="FV">EVEX.512.66.0F38.W0 44 /r</opc>
			<cpuid>
				<flag>AVX512CD</flag>
			</cpuid>
			<dscrp>Count the number of leading zero bits in each dword element of zmm2/m512/m32bcst using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPLZCNTQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.66.0F38.W1 44 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512CD</flag>
			</cpuid>
			<dscrp>Count the number of leading zero bits in each qword element of xmm2/m128/m64bcst using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPLZCNTQ</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.66.0F38.W1 44 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512CD</flag>
			</cpuid>
			<dscrp>Count the number of leading zero bits in each qword element of ymm2/m256/m64bcst using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPLZCNTQ</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst</args>
			<opc openc="FV">EVEX.512.66.0F38.W1 44 /r</opc>
			<cpuid>
				<flag>AVX512CD</flag>
			</cpuid>
			<dscrp>Count the number of leading zero bits in each qword element of zmm2/m512/m64bcst using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PMADDUBSW--Multiply and Add Packed Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PMADDUBSW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 04 /r</opc>
			<cpuid>
				<flag>SSSE3</flag>
			</cpuid>
			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMADDUBSW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38 04 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMADDUBSW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38 04 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMADDUBSW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F38.WIG 04 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMADDUBSW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F38.WIG 04 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMADDUBSW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F38.WIG 04 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Multiply signed and unsigned bytes, add horizontal pair of signed words, pack saturated signed-words to zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PMADDWD--Multiply and Add Packed Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PMADDWD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F F5 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Multiply the packed word integers in xmm1 by the packed word integers in xmm2/m128, add adjacent doubleword results, and store in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMADDWD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F F5 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Multiply the packed word integers in xmm2 by the packed word integers in xmm3/m128, add adjacent doubleword results, and store in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMADDWD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F F5 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Multiply the packed word integers in ymm2 by the packed word integers in ymm3/m256, add adjacent doubleword results, and store in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMADDWD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG F5 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Multiply the packed word integers in xmm2 by the packed word integers in xmm3/m128, add adjacent doubleword results, and store in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMADDWD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG F5 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Multiply the packed word integers in ymm2 by the packed word integers in ymm3/m256, add adjacent doubleword results, and store in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMADDWD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG F5 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Multiply the packed word integers in zmm2 by the packed word integers in zmm3/m512, add adjacent doubleword results, and store in zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PINSRB/PINSRW/PINSRD/PINSRQ--Insert Integer.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PINSRB</mnem>
			<args>xmm1,r32/m8,imm8</args>
			<opc openc="RMI">66 0F 3A 20 /r ib</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Insert a byte integer value from r32/m8 into xmm1 at the byte offset in imm8.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PINSRW</mnem>
			<args>xmm1,r32/m16,imm8</args>
			<opc openc="RMI">66 0F C4 /r ib</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Insert a word integer value from r32/m16 into xmm1 at the word offset in imm8.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PINSRD</mnem>
			<args>xmm1,r32/m32,imm8</args>
			<opc openc="RMI">66 0F 3A 22 /r ib</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Insert a dword integer value from r32/m32 into xmm1 at the dword offset in imm8.</dscrp>
		</ins>
		<ins x32m="NE" x64m="V">
			<mnem>PINSRQ</mnem>
			<args>xmm1,r64/m64,imm8</args>
			<opc openc="RMI">66 REX.W 0F 3A 22 /r ib</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Insert a qword integer value from r64/m64 into xmm1 at the qword offset in imm8.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPINSRB</mnem>
			<args>xmm1,xmm2,r32/m8,imm8</args>
			<opc openc="RVMI">VEX.NDS.128.66.0F3A 20 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Merge a byte integer value from r32/m8 and rest from xmm2 into xmm1 at the byte offset in imm8.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPINSRW</mnem>
			<args>xmm1,xmm2,r32/m16,imm8</args>
			<opc openc="RVMI">VEX.NDS.128.66.0F C4 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Insert a word integer value from r32/m16 and rest from xmm2 into xmm1 at the word offset in imm8.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPINSRD</mnem>
			<args>xmm1,xmm2,r32/m32,imm8</args>
			<opc openc="RVMI">VEX.NDS.128.66.0F3A.W0 22 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Insert a dword integer value from r32/m32 and rest from xmm2 into xmm1 at the dword offset in imm8.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VPINSRQ</mnem>
			<args>xmm1,xmm2,r64/m64,imm8</args>
			<opc openc="RVMI">VEX.NDS.128.66.0F3A.W1 22 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Insert a qword integer value from r64/m64 and rest from xmm2 into xmm1 at the qword offset in imm8.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPINSRB</mnem>
			<args>xmm1,xmm2,r32/m8,imm8</args>
			<opc openc="T1S">EVEX.NDS.128.66.0F3A.WIG 20 /r ib</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Merge a byte integer value from r32/m8 and rest from xmm2 into xmm1 at the byte offset in imm8.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPINSRW</mnem>
			<args>xmm1,xmm2,r32/m16,imm8</args>
			<opc openc="T1S">EVEX.NDS.128.66.0F.WIG C4 /r ib</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Insert a word integer value from r32/m16 and rest from xmm2 into xmm1 at the word offset in imm8.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPINSRD</mnem>
			<args>xmm1,xmm2,r32/m32,imm8</args>
			<opc openc="T1S">EVEX.NDS.128.66.0F3A.W0 22 /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Insert a dword integer value from r32/m32 and rest from xmm2 into xmm1 at the dword offset in imm8.</dscrp>
		</ins>
		<ins x32m="NE1" x64m="V">
			<mnem>VPINSRQ</mnem>
			<args>xmm1,xmm2,r64/m64,imm8</args>
			<opc openc="T1S">EVEX.NDS.128.66.0F3A.W1 22 /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Insert a qword integer value from r64/m64 and rest from xmm2 into xmm1 at the qword offset in imm8.</dscrp>
		</ins>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVMI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPMADD52LUQ--Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit Products to Qword Accumulators.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPMADD52LUQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 B4 /r</opc>
			<cpuid>
				<flag>AVX512IFMA</flag>
				<flag>AVX512VL</flag>
			</cpuid>
			<dscrp>Multiply unsigned 52-bit integers in xmm2 and xmm3/m128 and add the low 52 bits of the 104-bit product to the qword unsigned integers in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMADD52LUQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 B4 /r</opc>
			<cpuid>
				<flag>AVX512IFMA</flag>
				<flag>AVX512VL</flag>
			</cpuid>
			<dscrp>Multiply unsigned 52-bit integers in ymm2 and ymm3/m128 and add the low 52 bits of the 104-bit product to the qword unsigned integers in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMADD52LUQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 B4 /r</opc>
			<cpuid>
				<flag>AVX512IFMA</flag>
			</cpuid>
			<dscrp>Multiply unsigned 52-bit integers in zmm2 and zmm3/m128 and add the low 52 bits of the 104-bit product to the qword unsigned integers in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPMADD52HUQ--Packed Multiply of Unsigned 52-bit Unsigned Integers and Add High 52-bit Products to 64-bit Accumulators'.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPMADD52HUQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.DDS.128.66.0F38.W1 B5 /r</opc>
			<cpuid>
				<flag>AVX512IFMA</flag>
				<flag>AVX512VL</flag>
			</cpuid>
			<dscrp>Multiply unsigned 52-bit integers in xmm2 and xmm3/m128 and add the high 52 bits of the 104bit product to the qword unsigned integers in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMADD52HUQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.DDS.256.66.0F38.W1 B5 /r</opc>
			<cpuid>
				<flag>AVX512IFMA</flag>
				<flag>AVX512VL</flag>
			</cpuid>
			<dscrp>Multiply unsigned 52-bit integers in ymm2 and ymm3/m128 and add the high 52 bits of the 104bit product to the qword unsigned integers in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMADD52HUQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.DDS.512.66.0F38.W1 B5 /r</opc>
			<cpuid>
				<flag>AVX512IFMA</flag>
			</cpuid>
			<dscrp>Multiply unsigned 52-bit integers in zmm2 and zmm3/m128 and add the high 52 bits of the 104bit product to the qword unsigned integers in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PMAXSB/PMAXSW/PMAXSD/PMAXSQ--Maximum of Packed Signed Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PMAXSB</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 3C /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed maximum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PMAXSW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F EE /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in xmm2/m128 and xmm1 and stores maximum packed values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PMAXSD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 3D /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Compare packed signed dword integers in xmm1 and xmm2/m128 and store packed maximum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXSB</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 3C /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXSW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG EE /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in xmm3/m128 and xmm2 and store packed maximum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXSD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 3D /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed signed dword integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXSB</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 3C /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXSW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG EE /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in ymm3/m256 and ymm2 and store packed maximum values in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXSD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 3D /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed signed dword integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXSB</mnem>
			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F38.WIG 3C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXSB</mnem>
			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F38.WIG 3C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXSB</mnem>
			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F38.WIG 3C /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed byte integers in zmm2 and zmm3/m512 and store packed maximum values in zmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXSW</mnem>
			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG EE /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXSW</mnem>
			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG EE /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXSW</mnem>
			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG EE /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in zmm2 and zmm3/m512 and store packed maximum values in zmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXSD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 3D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed signed dword integers in xmm2 and xmm3/m128/m32bcst and store packed maximum values in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXSD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 3D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed signed dword integers in ymm2 and ymm3/m256/m32bcst and store packed maximum values in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXSD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 3D /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed signed dword integers in zmm2 and zmm3/m512/m32bcst and store packed maximum values in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXSQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 3D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed signed qword integers in xmm2 and xmm3/m128/m64bcst and store packed maximum values in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXSQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 3D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed signed qword integers in ymm2 and ymm3/m256/m64bcst and store packed maximum values in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXSQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 3D /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed signed qword integers in zmm2 and zmm3/m512/m64bcst and store packed maximum values in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PMAXUB/PMAXUW--Maximum of Packed Unsigned Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PMAXUB</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F DE /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed maximum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PMAXUW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 3E/r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Compare packed unsigned word integers in xmm2/m128 and xmm1 and stores maximum packed values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXUB</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F DE /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed unsigned byte integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXUW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38 3E/r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed unsigned word integers in xmm3/m128 and xmm2 and store maximum packed values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXUB</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F DE /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed unsigned byte integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXUW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38 3E/r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed unsigned word integers in ymm3/m256 and ymm2 and store maximum packed values in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXUB</mnem>
			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG DE /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed unsigned byte integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXUB</mnem>
			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG DE /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed unsigned byte integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXUB</mnem>
			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG DE /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed unsigned byte integers in zmm2 and zmm3/m512 and store packed maximum values in zmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXUW</mnem>
			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F38.WIG 3E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed unsigned word integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXUW</mnem>
			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F38.WIG 3E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed unsigned word integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXUW</mnem>
			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F38.WIG 3E /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed unsigned word integers in zmm2 and zmm3/m512 and store packed maximum values in zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PMAXUD/PMAXUQ--Maximum of Packed Unsigned Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PMAXUD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 3F /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Compare packed unsigned dword integers in xmm1 and xmm2/m128 and store packed maximum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXUD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 3F /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed unsigned dword integers in xmm2 and xmm3/m128 and store packed maximum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXUD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 3F /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed unsigned dword integers in ymm2 and ymm3/m256 and store packed maximum values in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXUD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 3F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed unsigned dword integers in xmm2 and xmm3/m128/m32bcst and store packed maximum values in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXUD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 3F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed unsigned dword integers in ymm2 and ymm3/m256/m32bcst and store packed maximum values in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXUD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 3F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed unsigned dword integers in zmm2 and zmm3/m512/m32bcst and store packed maximum values in zmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXUQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 3F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed unsigned qword integers in xmm2 and xmm3/m128/m64bcst and store packed maximum values in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXUQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 3F /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed unsigned qword integers in ymm2 and ymm3/m256/m64bcst and store packed maximum values in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMAXUQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 3F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed unsigned qword integers in zmm2 and zmm3/m512/m64bcst and store packed maximum values in zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PMINSB/PMINSW--Minimum of Packed Signed Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PMINSB</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 38 /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed minimum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PMINSW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F EA /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in xmm2/m128 and xmm1 and store packed minimum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINSB</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38 38 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINSW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F EA /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in xmm3/m128 and xmm2 and return packed minimum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINSB</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38 38 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINSW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F EA /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in ymm3/m256 and ymm2 and return packed minimum values in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINSB</mnem>
			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F38.WIG 38 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed byte integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINSB</mnem>
			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F38.WIG 38 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed byte integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINSB</mnem>
			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F38.WIG 38 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed byte integers in zmm2 and zmm3/m512 and store packed minimum values in zmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINSW</mnem>
			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG EA /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINSW</mnem>
			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG EA /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINSW</mnem>
			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG EA /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed signed word integers in zmm2 and zmm3/m512 and store packed minimum values in zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PMINSD/PMINSQ--Minimum of Packed Signed Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PMINSD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 39 /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Compare packed signed dword integers in xmm1 and xmm2/m128 and store packed minimum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINSD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 39 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed signed dword integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINSD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 39 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed signed dword integers in ymm2 and ymm3/m128 and store packed minimum values in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINSD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 39 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed signed dword integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINSD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 39 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed signed dword integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINSD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 39 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed signed dword integers in zmm2 and zmm3/m512/m32bcst and store packed minimum values in zmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINSQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 39 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed signed qword integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINSQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 39 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed signed qword integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINSQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 39 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed signed qword integers in zmm2 and zmm3/m512/m64bcst and store packed minimum values in zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PMINUB/PMINUW--Minimum of Packed Unsigned Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PMINUB</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F DA /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed minimum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PMINUW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 3A/r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Compare packed unsigned word integers in xmm2/m128 and xmm1 and store packed minimum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINUB</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F DA /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed unsigned byte integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINUW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38 3A/r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed unsigned word integers in xmm3/m128 and xmm2 and return packed minimum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINUB</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F DA /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed unsigned byte integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINUW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38 3A/r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed unsigned word integers in ymm3/m256 and ymm2 and return packed minimum values in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINUB</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F DA /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed unsigned byte integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINUB</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F DA /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed unsigned byte integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINUB</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F DA /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed unsigned byte integers in zmm2 and zmm3/m512 and store packed minimum values in zmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINUW</mnem>
			<args>xmm1{k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F38 3A/r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed unsigned word integers in xmm3/m128 and xmm2 and return packed minimum values in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINUW</mnem>
			<args>ymm1{k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F38 3A/r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed unsigned word integers in ymm3/m256 and ymm2 and return packed minimum values in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINUW</mnem>
			<args>zmm1{k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F38 3A/r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Compare packed unsigned word integers in zmm3/m512 and zmm2 and return packed minimum values in zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PMINUD/PMINUQ--Minimum of Packed Unsigned Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PMINUD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 3B /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Compare packed unsigned dword integers in xmm1 and xmm2/m128 and store packed minimum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINUD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 3B /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare packed unsigned dword integers in xmm2 and xmm3/m128 and store packed minimum values in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINUD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 3B /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Compare packed unsigned dword integers in ymm2 and ymm3/m256 and store packed minimum values in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINUD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 3B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed unsigned dword integers in xmm2 and xmm3/m128/m32bcst and store packed minimum values in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINUD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 3B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed unsigned dword integers in ymm2 and ymm3/m256/m32bcst and store packed minimum values in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINUD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 3B /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed unsigned dword integers in zmm2 and zmm3/m512/m32bcst and store packed minimum values in zmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINUQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 3B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed unsigned qword integers in xmm2 and xmm3/m128/m64bcst and store packed minimum values in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINUQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 3B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed unsigned qword integers in ymm2 and ymm3/m256/m64bcst and store packed minimum values in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMINUQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 3B /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare packed unsigned qword integers in zmm2 and zmm3/m512/m64bcst and store packed minimum values in zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPMOVM2B/VPMOVM2W/VPMOVM2D/VPMOVM2Q--Convert a Mask Register to a Vector Register.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVM2B</mnem>
			<args>xmm1,k1</args>
			<opc openc="RM">EVEX.128.F3.0F38.W0 28 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Sets each byte in XMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVM2B</mnem>
			<args>ymm1,k1</args>
			<opc openc="RM">EVEX.256.F3.0F38.W0 28 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Sets each byte in YMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVM2B</mnem>
			<args>zmm1,k1</args>
			<opc openc="RM">EVEX.512.F3.0F38.W0 28 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Sets each byte in ZMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVM2W</mnem>
			<args>xmm1,k1</args>
			<opc openc="RM">EVEX.128.F3.0F38.W1 28 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Sets each word in XMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVM2W</mnem>
			<args>ymm1,k1</args>
			<opc openc="RM">EVEX.256.F3.0F38.W1 28 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Sets each word in YMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVM2W</mnem>
			<args>zmm1,k1</args>
			<opc openc="RM">EVEX.512.F3.0F38.W1 28 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Sets each word in ZMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVM2D</mnem>
			<args>xmm1,k1</args>
			<opc openc="RM">EVEX.128.F3.0F38.W0 38 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Sets each doubleword in XMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVM2D</mnem>
			<args>ymm1,k1</args>
			<opc openc="RM">EVEX.256.F3.0F38.W0 38 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Sets each doubleword in YMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVM2D</mnem>
			<args>zmm1,k1</args>
			<opc openc="RM">EVEX.512.F3.0F38.W0 38 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Sets each doubleword in ZMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVM2Q</mnem>
			<args>xmm1,k1</args>
			<opc openc="RM">EVEX.128.F3.0F38.W1 38 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Sets each quadword in XMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVM2Q</mnem>
			<args>ymm1,k1</args>
			<opc openc="RM">EVEX.256.F3.0F38.W1 38 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Sets each quadword in YMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVM2Q</mnem>
			<args>zmm1,k1</args>
			<opc openc="RM">EVEX.512.F3.0F38.W1 38 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Sets each quadword in ZMM1 to all 1's or all 0's based on the value of the corresponding bit in k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPMOVB2M/VPMOVW2M/VPMOVD2M/VPMOVQ2M--Convert a Vector Register to a Mask.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVB2M</mnem>
			<args>k1,xmm1</args>
			<opc openc="RM">EVEX.128.F3.0F38.W0 29 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding byte in XMM1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVB2M</mnem>
			<args>k1,ymm1</args>
			<opc openc="RM">EVEX.256.F3.0F38.W0 29 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding byte in YMM1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVB2M</mnem>
			<args>k1,zmm1</args>
			<opc openc="RM">EVEX.512.F3.0F38.W0 29 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding byte in ZMM1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVW2M</mnem>
			<args>k1,xmm1</args>
			<opc openc="RM">EVEX.128.F3.0F38.W1 29 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding word in XMM1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVW2M</mnem>
			<args>k1,ymm1</args>
			<opc openc="RM">EVEX.256.F3.0F38.W1 29 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding word in YMM1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVW2M</mnem>
			<args>k1,zmm1</args>
			<opc openc="RM">EVEX.512.F3.0F38.W1 29 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding word in ZMM1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVD2M</mnem>
			<args>k1,xmm1</args>
			<opc openc="RM">EVEX.128.F3.0F38.W0 39 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding doubleword in XMM1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVD2M</mnem>
			<args>k1,ymm1</args>
			<opc openc="RM">EVEX.256.F3.0F38.W0 39 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding doubleword in YMM1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVD2M</mnem>
			<args>k1,zmm1</args>
			<opc openc="RM">EVEX.512.F3.0F38.W0 39 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding doubleword in ZMM1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVQ2M</mnem>
			<args>k1,xmm1</args>
			<opc openc="RM">EVEX.128.F3.0F38.W1 39 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding quadword in XMM1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVQ2M</mnem>
			<args>k1,ymm1</args>
			<opc openc="RM">EVEX.256.F3.0F38.W1 39 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding quadword in YMM1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVQ2M</mnem>
			<args>k1,zmm1</args>
			<opc openc="RM">EVEX.512.F3.0F38.W1 39 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Sets each bit in k1 to 1 or 0 based on the value of the most significant bit of the corresponding quadword in ZMM1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPMOVQB/VPMOVSQB/VPMOVUSQB--Down Convert QWord to Byte.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVQB</mnem>
			<args>xmm1/m16 {k1}{z},xmm2</args>
			<opc openc="OVM">EVEX.128.F3.0F38.W0 32 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 2 packed quad-word integers from xmm2 into 2 packed byte integers in xmm1/m16 with truncation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSQB</mnem>
			<args>xmm1/m16 {k1}{z},xmm2</args>
			<opc openc="OVM">EVEX.128.F3.0F38.W0 22 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 2 packed signed quad-word integers from xmm2 into 2 packed signed byte integers in xmm1/m16 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVUSQB</mnem>
			<args>xmm1/m16 {k1}{z},xmm2</args>
			<opc openc="OVM">EVEX.128.F3.0F38.W0 12 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 2 packed unsigned quad-word integers from xmm2 into 2 packed unsigned byte integers in xmm1/m16 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVQB</mnem>
			<args>xmm1/m32 {k1}{z},ymm2</args>
			<opc openc="OVM">EVEX.256.F3.0F38.W0 32 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 4 packed quad-word integers from ymm2 into 4 packed byte integers in xmm1/m32 with truncation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSQB</mnem>
			<args>xmm1/m32 {k1}{z},ymm2</args>
			<opc openc="OVM">EVEX.256.F3.0F38.W0 22 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 4 packed signed quad-word integers from ymm2 into 4 packed signed byte integers in xmm1/m32 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVUSQB</mnem>
			<args>xmm1/m32 {k1}{z},ymm2</args>
			<opc openc="OVM">EVEX.256.F3.0F38.W0 12 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 4 packed unsigned quad-word integers from ymm2 into 4 packed unsigned byte integers in xmm1/m32 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVQB</mnem>
			<args>xmm1/m64 {k1}{z},zmm2</args>
			<opc openc="OVM">EVEX.512.F3.0F38.W0 32 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 8 packed quad-word integers from zmm2 into 8 packed byte integers in xmm1/m64 with truncation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSQB</mnem>
			<args>xmm1/m64 {k1}{z},zmm2</args>
			<opc openc="OVM">EVEX.512.F3.0F38.W0 22 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 8 packed signed quad-word integers from zmm2 into 8 packed signed byte integers in xmm1/m64 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVUSQB</mnem>
			<args>xmm1/m64 {k1}{z},zmm2</args>
			<opc openc="OVM">EVEX.512.F3.0F38.W0 12 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 8 packed unsigned quad-word integers from zmm2 into 8 packed unsigned byte integers in xmm1/m64 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="OVM">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPMOVQW/VPMOVSQW/VPMOVUSQW--Down Convert QWord to Word.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVQW</mnem>
			<args>xmm1/m32 {k1}{z},xmm2</args>
			<opc openc="QVM">EVEX.128.F3.0F38.W0 34 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 2 packed quad-word integers from xmm2 into 2 packed word integers in xmm1/m32 with truncation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSQW</mnem>
			<args>xmm1/m32 {k1}{z},xmm2</args>
			<opc openc="QVM">EVEX.128.F3.0F38.W0 24 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 8 packed signed quad-word integers from zmm2 into 8 packed signed word integers in xmm1/m32 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVUSQW</mnem>
			<args>xmm1/m32 {k1}{z},xmm2</args>
			<opc openc="QVM">EVEX.128.F3.0F38.W0 14 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 2 packed unsigned quad-word integers from xmm2 into 2 packed unsigned word integers in xmm1/m32 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVQW</mnem>
			<args>xmm1/m64 {k1}{z},ymm2</args>
			<opc openc="QVM">EVEX.256.F3.0F38.W0 34 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 4 packed quad-word integers from ymm2 into 4 packed word integers in xmm1/m64 with truncation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSQW</mnem>
			<args>xmm1/m64 {k1}{z},ymm2</args>
			<opc openc="QVM">EVEX.256.F3.0F38.W0 24 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 4 packed signed quad-word integers from ymm2 into 4 packed signed word integers in xmm1/m64 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVUSQW</mnem>
			<args>xmm1/m64 {k1}{z},ymm2</args>
			<opc openc="QVM">EVEX.256.F3.0F38.W0 14 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 4 packed unsigned quad-word integers from ymm2 into 4 packed unsigned word integers in xmm1/m64 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVQW</mnem>
			<args>xmm1/m128 {k1}{z},zmm2</args>
			<opc openc="QVM">EVEX.512.F3.0F38.W0 34 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 8 packed quad-word integers from zmm2 into 8 packed word integers in xmm1/m128 with truncation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSQW</mnem>
			<args>xmm1/m128 {k1}{z},zmm2</args>
			<opc openc="QVM">EVEX.512.F3.0F38.W0 24 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 8 packed signed quad-word integers from zmm2 into 8 packed signed word integers in xmm1/m128 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVUSQW</mnem>
			<args>xmm1/m128 {k1}{z},zmm2</args>
			<opc openc="QVM">EVEX.512.F3.0F38.W0 14 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 8 packed unsigned quad-word integers from zmm2 into 8 packed unsigned word integers in xmm1/m128 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="QVM">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPMOVQD/VPMOVSQD/VPMOVUSQD--Down Convert QWord to DWord.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVQD</mnem>
			<args>xmm1/m128 {k1}{z},xmm2</args>
			<opc openc="A">EVEX.128.F3.0F38.W0 35 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 2 packed quad-word integers from xmm2 into 2 packed double-word integers in xmm1/m128 with truncation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSQD</mnem>
			<args>xmm1/m64 {k1}{z},xmm2</args>
			<opc openc="A">EVEX.128.F3.0F38.W0 25 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 2 packed signed quad-word integers from xmm2 into 2 packed signed double-word integers in xmm1/m64 using signed saturation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVUSQD</mnem>
			<args>xmm1/m64 {k1}{z},xmm2</args>
			<opc openc="A">EVEX.128.F3.0F38.W0 15 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 2 packed unsigned quad-word integers from xmm2 into 2 packed unsigned double-word integers in xmm1/m64 using unsigned saturation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVQD</mnem>
			<args>xmm1/m128 {k1}{z},ymm2</args>
			<opc openc="A">EVEX.256.F3.0F38.W0 35 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 4 packed quad-word integers from ymm2 into 4 packed double-word integers in xmm1/m128 with truncation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSQD</mnem>
			<args>xmm1/m128 {k1}{z},ymm2</args>
			<opc openc="A">EVEX.256.F3.0F38.W0 25 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 4 packed signed quad-word integers from ymm2 into 4 packed signed double-word integers in xmm1/m128 using signed saturation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVUSQD</mnem>
			<args>xmm1/m128 {k1}{z},ymm2</args>
			<opc openc="A">EVEX.256.F3.0F38.W0 15 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 4 packed unsigned quad-word integers from ymm2 into 4 packed unsigned double-word integers in xmm1/m128 using unsigned saturation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVQD</mnem>
			<args>ymm1/m256 {k1}{z},zmm2</args>
			<opc openc="HVM">EVEX.512.F3.0F38.W0 35 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 8 packed quad-word integers from zmm2 into 8 packed double-word integers in ymm1/m256 with truncation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSQD</mnem>
			<args>ymm1/m256 {k1}{z},zmm2</args>
			<opc openc="HVM">EVEX.512.F3.0F38.W0 25 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 8 packed signed quad-word integers from zmm2 into 8 packed signed double-word integers in ymm1/m256 using signed saturation subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVUSQD</mnem>
			<args>ymm1/m256 {k1}{z},zmm2</args>
			<opc openc="HVM">EVEX.512.F3.0F38.W0 15 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 8 packed unsigned quad-word integers from zmm2 into 8 packed unsigned double-word integers in ymm1/m256 using unsigned saturation subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="HVM">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPMOVDB/VPMOVSDB/VPMOVUSDB--Down Convert DWord to Byte.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVDB</mnem>
			<args>xmm1/m32 {k1}{z},xmm2</args>
			<opc openc="QVM">EVEX.128.F3.0F38.W0 31 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 4 packed double-word integers from xmm2 into 4 packed byte integers in xmm1/m32 with truncation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSDB</mnem>
			<args>xmm1/m32 {k1}{z},xmm2</args>
			<opc openc="QVM">EVEX.128.F3.0F38.W0 21 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 4 packed signed double-word integers from xmm2 into 4 packed signed byte integers in xmm1/m32 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVUSDB</mnem>
			<args>xmm1/m32 {k1}{z},xmm2</args>
			<opc openc="QVM">EVEX.128.F3.0F38.W0 11 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 4 packed unsigned double-word integers from xmm2 into 4 packed unsigned byte integers in xmm1/m32 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVDB</mnem>
			<args>xmm1/m64 {k1}{z},ymm2</args>
			<opc openc="QVM">EVEX.256.F3.0F38.W0 31 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 8 packed double-word integers from ymm2 into 8 packed byte integers in xmm1/m64 with truncation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSDB</mnem>
			<args>xmm1/m64 {k1}{z},ymm2</args>
			<opc openc="QVM">EVEX.256.F3.0F38.W0 21 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 8 packed signed double-word integers from ymm2 into 8 packed signed byte integers in xmm1/m64 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVUSDB</mnem>
			<args>xmm1/m64 {k1}{z},ymm2</args>
			<opc openc="QVM">EVEX.256.F3.0F38.W0 11 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 8 packed unsigned double-word integers from ymm2 into 8 packed unsigned byte integers in xmm1/m64 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVDB</mnem>
			<args>xmm1/m128 {k1}{z},zmm2</args>
			<opc openc="QVM">EVEX.512.F3.0F38.W0 31 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 16 packed double-word integers from zmm2 into 16 packed byte integers in xmm1/m128 with truncation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSDB</mnem>
			<args>xmm1/m128 {k1}{z},zmm2</args>
			<opc openc="QVM">EVEX.512.F3.0F38.W0 21 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 16 packed signed double-word integers from zmm2 into 16 packed signed byte integers in xmm1/m128 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVUSDB</mnem>
			<args>xmm1/m128 {k1}{z},zmm2</args>
			<opc openc="QVM">EVEX.512.F3.0F38.W0 11 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 16 packed unsigned double-word integers from zmm2 into 16 packed unsigned byte integers in xmm1/m128 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="QVM">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPMOVDW/VPMOVSDW/VPMOVUSDW--Down Convert DWord to Word.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVDW</mnem>
			<args>xmm1/m64 {k1}{z},xmm2</args>
			<opc openc="HVM">EVEX.128.F3.0F38.W0 33 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 4 packed double-word integers from xmm2 into 4 packed word integers in xmm1/m64 with truncation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSDW</mnem>
			<args>xmm1/m64 {k1}{z},xmm2</args>
			<opc openc="HVM">EVEX.128.F3.0F38.W0 23 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 4 packed signed double-word integers from xmm2 into 4 packed signed word integers in ymm1/m64 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVUSDW</mnem>
			<args>xmm1/m64 {k1}{z},xmm2</args>
			<opc openc="HVM">EVEX.128.F3.0F38.W0 13 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 4 packed unsigned double-word integers from xmm2 into 4 packed unsigned word integers in xmm1/m64 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVDW</mnem>
			<args>xmm1/m128 {k1}{z},ymm2</args>
			<opc openc="HVM">EVEX.256.F3.0F38.W0 33 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 8 packed double-word integers from ymm2 into 8 packed word integers in xmm1/m128 with truncation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSDW</mnem>
			<args>xmm1/m128 {k1}{z},ymm2</args>
			<opc openc="HVM">EVEX.256.F3.0F38.W0 23 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 8 packed signed double-word integers from ymm2 into 8 packed signed word integers in xmm1/m128 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVUSDW</mnem>
			<args>xmm1/m128 {k1}{z},ymm2</args>
			<opc openc="HVM">EVEX.256.F3.0F38.W0 13 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 8 packed unsigned double-word integers from ymm2 into 8 packed unsigned word integers in xmm1/m128 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVDW</mnem>
			<args>ymm1/m256 {k1}{z},zmm2</args>
			<opc openc="HVM">EVEX.512.F3.0F38.W0 33 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 16 packed double-word integers from zmm2 into 16 packed word integers in ymm1/m256 with truncation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSDW</mnem>
			<args>ymm1/m256 {k1}{z},zmm2</args>
			<opc openc="HVM">EVEX.512.F3.0F38.W0 23 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 16 packed signed double-word integers from zmm2 into 16 packed signed word integers in ymm1/m256 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVUSDW</mnem>
			<args>ymm1/m256 {k1}{z},zmm2</args>
			<opc openc="HVM">EVEX.512.F3.0F38.W0 13 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Converts 16 packed unsigned double-word integers from zmm2 into 16 packed unsigned word integers in ymm1/m256 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="HVM">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPMOVWB/VPMOVSWB/VPMOVUSWB--Down Convert Word to Byte.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVWB</mnem>
			<args>xmm1/m64 {k1}{z},xmm2</args>
			<opc openc="HVM">EVEX.128.F3.0F38.W0 30 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Converts 8 packed word integers from xmm2 into 8 packed bytes in xmm1/m64 with truncation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSWB</mnem>
			<args>xmm1/m64 {k1}{z},xmm2</args>
			<opc openc="HVM">EVEX.128.F3.0F38.W0 20 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Converts 8 packed signed word integers from xmm2 into 8 packed signed bytes in xmm1/m64 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVUSWB</mnem>
			<args>xmm1/m64 {k1}{z},xmm2</args>
			<opc openc="HVM">EVEX.128.F3.0F38.W0 10 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Converts 8 packed unsigned word integers from xmm2 into 8 packed unsigned bytes in 8mm1/m64 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVWB</mnem>
			<args>xmm1/m128 {k1}{z},ymm2</args>
			<opc openc="HVM">EVEX.256.F3.0F38.W0 30 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Converts 16 packed word integers from ymm2 into 16 packed bytes in xmm1/m128 with truncation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSWB</mnem>
			<args>xmm1/m128 {k1}{z},ymm2</args>
			<opc openc="HVM">EVEX.256.F3.0F38.W0 20 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Converts 16 packed signed word integers from ymm2 into 16 packed signed bytes in xmm1/m128 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVUSWB</mnem>
			<args>xmm1/m128 {k1}{z},ymm2</args>
			<opc openc="HVM">EVEX.256.F3.0F38.W0 10 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Converts 16 packed unsigned word integers from ymm2 into 16 packed unsigned bytes in xmm1/m128 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVWB</mnem>
			<args>ymm1/m256 {k1}{z},zmm2</args>
			<opc openc="HVM">EVEX.512.F3.0F38.W0 30 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Converts 32 packed word integers from zmm2 into 32 packed bytes in ymm1/m256 with truncation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSWB</mnem>
			<args>ymm1/m256 {k1}{z},zmm2</args>
			<opc openc="HVM">EVEX.512.F3.0F38.W0 20 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Converts 32 packed signed word integers from zmm2 into 32 packed signed bytes in ymm1/m256 using signed saturation under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVUSWB</mnem>
			<args>ymm1/m256 {k1}{z},zmm2</args>
			<opc openc="HVM">EVEX.512.F3.0F38.W0 10 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Converts 32 packed unsigned word integers from zmm2 into 32 packed unsigned bytes in ymm1/m256 using unsigned saturation under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="HVM">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PMOVSX--Packed Move with Sign Extend.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PMOVSXBW</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">66 0f 38 20 /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PMOVSXBD</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">66 0f 38 21 /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Sign extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PMOVSXBQ</mnem>
			<args>xmm1,xmm2/m16</args>
			<opc openc="RM">66 0f 38 22 /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Sign extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PMOVSXWD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">66 0f 38 23/r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PMOVSXWQ</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">66 0f 38 24 /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Sign extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PMOVSXDQ</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">66 0f 38 25 /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXBW</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">VEX.128.66.0F38.WIG 20 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXBD</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">VEX.128.66.0F38.WIG 21 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Sign extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXBQ</mnem>
			<args>xmm1,xmm2/m16</args>
			<opc openc="RM">VEX.128.66.0F38.WIG 22 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Sign extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXWD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">VEX.128.66.0F38.WIG 23 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXWQ</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">VEX.128.66.0F38.WIG 24 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Sign extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXDQ</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">VEX.128.66.0F38.WIG 25 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXBW</mnem>
			<args>ymm1,xmm2/m128</args>
			<opc openc="RM">VEX.256.66.0F38.WIG 20 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Sign extend 16 packed 8-bit integers in xmm2/m128 to 16 packed 16-bit integers in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXBD</mnem>
			<args>ymm1,xmm2/m64</args>
			<opc openc="RM">VEX.256.66.0F38.WIG 21 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 32-bit integers in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXBQ</mnem>
			<args>ymm1,xmm2/m32</args>
			<opc openc="RM">VEX.256.66.0F38.WIG 22 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Sign extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 64-bit integers in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXWD</mnem>
			<args>ymm1,xmm2/m128</args>
			<opc openc="RM">VEX.256.66.0F38.WIG 23 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Sign extend 8 packed 16-bit integers in the low 16 bytes of xmm2/m128 to 8 packed 32-bit integers in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXWQ</mnem>
			<args>ymm1,xmm2/m64</args>
			<opc openc="RM">VEX.256.66.0F38.WIG 24 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 64-bit integers in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXDQ</mnem>
			<args>ymm1,xmm2/m128</args>
			<opc openc="RM">VEX.256.66.0F38.WIG 25 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Sign extend 4 packed 32-bit integers in the low 16 bytes of xmm2/m128 to 4 packed 64-bit integers in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXBW</mnem>
			<args>xmm1 {k1}{z},xmm2/m64</args>
			<opc openc="HVM">EVEX.128.66.0F38.WIG 20 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Sign extend 8 packed 8-bit integers in xmm2/m64 to 8 packed 16-bit integers in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXBW</mnem>
			<args>ymm1 {k1}{z},xmm2/m128</args>
			<opc openc="HVM">EVEX.256.66.0F38.WIG 20 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Sign extend 16 packed 8-bit integers in xmm2/m128 to 16 packed 16-bit integers in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXBW</mnem>
			<args>zmm1 {k1}{z},ymm2/m256</args>
			<opc openc="HVM">EVEX.512.66.0F38.WIG 20 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Sign extend 32 packed 8-bit integers in ymm2/m256 to 32 packed 16-bit integers in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXBD</mnem>
			<args>xmm1 {k1}{z},xmm2/m32</args>
			<opc openc="QVM">EVEX.128.66.0F38.WIG 21 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Sign extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXBD</mnem>
			<args>ymm1 {k1}{z},xmm2/m64</args>
			<opc openc="QVM">EVEX.256.66.0F38.WIG 21 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 32-bit integers in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXBD</mnem>
			<args>zmm1 {k1}{z},xmm2/m128</args>
			<opc openc="QVM">EVEX.512.66.0F38.WIG 21 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Sign extend 16 packed 8-bit integers in the low 16 bytes of xmm2/m128 to 16 packed 32-bit integers in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXBQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m16</args>
			<opc openc="OVM">EVEX.128.66.0F38.WIG 22 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Sign extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXBQ</mnem>
			<args>ymm1 {k1}{z},xmm2/m32</args>
			<opc openc="OVM">EVEX.256.66.0F38.WIG 22 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Sign extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 64-bit integers in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXBQ</mnem>
			<args>zmm1 {k1}{z},xmm2/m64</args>
			<opc openc="OVM">EVEX.512.66.0F38.WIG 22 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 64-bit integers in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXWD</mnem>
			<args>xmm1 {k1}{z},xmm2/m64</args>
			<opc openc="HVM">EVEX.128.66.0F38.WIG 23 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Sign extend 4 packed 16-bit integers in the low 8 bytes of ymm2/mem to 4 packed 32-bit integers in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXWD</mnem>
			<args>ymm1 {k1}{z},xmm2/m128</args>
			<opc openc="HVM">EVEX.256.66.0F38.WIG 23 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Sign extend 8 packed 16-bit integers in the low 16 bytes of ymm2/m128 to 8 packed 32-bit integers in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXWD</mnem>
			<args>zmm1 {k1}{z},ymm2/m256</args>
			<opc openc="HVM">EVEX.512.66.0F38.WIG 23 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Sign extend 16 packed 16-bit integers in the low 32 bytes of ymm2/m256 to 16 packed 32-bit integers in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXWQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m32</args>
			<opc openc="QVM">EVEX.128.66.0F38.WIG 24 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Sign extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXWQ</mnem>
			<args>ymm1 {k1}{z},xmm2/m64</args>
			<opc openc="QVM">EVEX.256.66.0F38.WIG 24 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 64-bit integers in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXWQ</mnem>
			<args>zmm1 {k1}{z},xmm2/m128</args>
			<opc openc="QVM">EVEX.512.66.0F38.WIG 24 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Sign extend 8 packed 16-bit integers in the low 16 bytes of xmm2/m128 to 8 packed 64-bit integers in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXDQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m64</args>
			<opc openc="HVM">EVEX.128.66.0F38.W0 25 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXDQ</mnem>
			<args>ymm1 {k1}{z},xmm2/m128</args>
			<opc openc="HVM">EVEX.256.66.0F38.W0 25 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Sign extend 4 packed 32-bit integers in the low 16 bytes of xmm2/m128 to 4 packed 64-bit integers in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVSXDQ</mnem>
			<args>zmm1 {k1}{z},ymm2/m256</args>
			<opc openc="HVM">EVEX.512.66.0F38.W0 25 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Sign extend 8 packed 32-bit integers in the low 32 bytes of ymm2/m256 to 8 packed 64-bit integers in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="HVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="QVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="OVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PMOVZX--Packed Move with Zero Extend.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PMOVZXBW</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">66 0f 38 30 /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PMOVZXBD</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">66 0f 38 31 /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Zero extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PMOVZXBQ</mnem>
			<args>xmm1,xmm2/m16</args>
			<opc openc="RM">66 0f 38 32 /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Zero extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PMOVZXWD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">66 0f 38 33 /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PMOVZXWQ</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">66 0f 38 34 /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Zero extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PMOVZXDQ</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">66 0f 38 35 /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXBW</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">VEX.128.66.0F38.WIG 30 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXBD</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">VEX.128.66.0F38.WIG 31 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Zero extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXBQ</mnem>
			<args>xmm1,xmm2/m16</args>
			<opc openc="RM">VEX.128.66.0F38.WIG 32 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Zero extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXWD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">VEX.128.66.0F38.WIG 33 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXWQ</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">VEX.128.66.0F38.WIG 34 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Zero extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXDQ</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">VEX.128.66.0F 38.WIG 35 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXBW</mnem>
			<args>ymm1,xmm2/m128</args>
			<opc openc="RM">VEX.256.66.0F38.WIG 30 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Zero extend 16 packed 8-bit integers in xmm2/m128 to 16 packed 16-bit integers in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXBD</mnem>
			<args>ymm1,xmm2/m64</args>
			<opc openc="RM">VEX.256.66.0F38.WIG 31 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 32-bit integers in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXBQ</mnem>
			<args>ymm1,xmm2/m32</args>
			<opc openc="RM">VEX.256.66.0F38.WIG 32 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Zero extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 64-bit integers in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXWD</mnem>
			<args>ymm1,xmm2/m128</args>
			<opc openc="RM">VEX.256.66.0F38.WIG 33 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Zero extend 8 packed 16-bit integers xmm2/m128 to 8 packed 32-bit integers in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXWQ</mnem>
			<args>ymm1,xmm2/m64</args>
			<opc openc="RM">VEX.256.66.0F38.WIG 34 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 64-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXDQ</mnem>
			<args>ymm1,xmm2/m128</args>
			<opc openc="RM">VEX.256.66.0F38.WIG 35 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Zero extend 4 packed 32-bit integers in xmm2/m128 to 4 packed 64-bit integers in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXBW</mnem>
			<args>xmm1 {k1}{z},xmm2/m64</args>
			<opc openc="HVM">EVEX.128.66.0F38 30.WIG /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit integers in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXBW</mnem>
			<args>ymm1 {k1}{z},xmm2/m128</args>
			<opc openc="HVM">EVEX.256.66.0F38.WIG 30 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Zero extend 16 packed 8-bit integers in xmm2/m128 to 16 packed 16-bit integers in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXBW</mnem>
			<args>zmm1 {k1}{z},ymm2/m256</args>
			<opc openc="HVM">EVEX.512.66.0F38.WIG 30 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Zero extend 32 packed 8-bit integers in ymm2/m256 to 32 packed 16-bit integers in zmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXBD</mnem>
			<args>xmm1 {k1}{z},xmm2/m32</args>
			<opc openc="QVM">EVEX.128.66.0F38.WIG 31 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Zero extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 32-bit integers in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXBD</mnem>
			<args>ymm1 {k1}{z},xmm2/m64</args>
			<opc openc="QVM">EVEX.256.66.0F38.WIG 31 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 32-bit integers in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXBD</mnem>
			<args>zmm1 {k1}{z},xmm2/m128</args>
			<opc openc="QVM">EVEX.512.66.0F38.WIG 31 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Zero extend 16 packed 8-bit integers in xmm2/m128 to 16 packed 32-bit integers in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXBQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m16</args>
			<opc openc="OVM">EVEX.128.66.0F38.WIG 32 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Zero extend 2 packed 8-bit integers in the low 2 bytes of xmm2/m16 to 2 packed 64-bit integers in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXBQ</mnem>
			<args>ymm1 {k1}{z},xmm2/m32</args>
			<opc openc="OVM">EVEX.256.66.0F38.WIG 32 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Zero extend 4 packed 8-bit integers in the low 4 bytes of xmm2/m32 to 4 packed 64-bit integers in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXBQ</mnem>
			<args>zmm1 {k1}{z},xmm2/m64</args>
			<opc openc="OVM">EVEX.512.66.0F38.WIG 32 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 64-bit integers in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXWD</mnem>
			<args>xmm1 {k1}{z},xmm2/m64</args>
			<opc openc="HVM">EVEX.128.66.0F38.WIG 33 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit integers in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXWD</mnem>
			<args>ymm1 {k1}{z},xmm2/m128</args>
			<opc openc="HVM">EVEX.256.66.0F38.WIG 33 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Zero extend 8 packed 16-bit integers in xmm2/m128 to 8 packed 32-bit integers in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXWD</mnem>
			<args>zmm1 {k1}{z},ymm2/m256</args>
			<opc openc="HVM">EVEX.512.66.0F38.WIG 33 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Zero extend 16 packed 16-bit integers in ymm2/m256 to 16 packed 32-bit integers in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXWQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m32</args>
			<opc openc="QVM">EVEX.128.66.0F38.WIG 34 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Zero extend 2 packed 16-bit integers in the low 4 bytes of xmm2/m32 to 2 packed 64-bit integers in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXWQ</mnem>
			<args>ymm1 {k1}{z},xmm2/m64</args>
			<opc openc="QVM">EVEX.256.66.0F38.WIG 34 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 64-bit integers in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXWQ</mnem>
			<args>zmm1 {k1}{z},xmm2/m128</args>
			<opc openc="QVM">EVEX.512.66.0F38.WIG 34 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Zero extend 8 packed 16-bit integers in xmm2/m128 to 8 packed 64-bit integers in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXDQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m64</args>
			<opc openc="HVM">EVEX.128.66.0F38.W0 35 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit integers in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXDQ</mnem>
			<args>ymm1 {k1}{z},xmm2/m128</args>
			<opc openc="HVM">EVEX.256.66.0F38.W0 35 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Zero extend 4 packed 32-bit integers in xmm2/m128 to 4 packed 64-bit integers in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMOVZXDQ</mnem>
			<args>zmm1 {k1}{z},ymm2/m256</args>
			<opc openc="HVM">EVEX.512.66.0F38.W0 35 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Zero extend 8 packed 32-bit integers in ymm2/m256 to 8 packed 64-bit integers in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="HVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="QVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="OVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PMULDQ--Multiply Packed Doubleword Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PMULDQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 28 /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Multiply packed signed doubleword integers in xmm1 by packed signed doubleword integers in xmm2/m128, and store the quadword results in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULDQ</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 28 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Multiply packed signed doubleword integers in xmm2 by packed signed doubleword integers in xmm3/m128, and store the quadword results in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULDQ</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 28 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Multiply packed signed doubleword integers in ymm2 by packed signed doubleword integers in ymm3/m256, and store the quadword results in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULDQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 28 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed signed doubleword integers in xmm2 by packed signed doubleword integers in xmm3/m128/m64bcst, and store the quadword results in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULDQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 28 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed signed doubleword integers in ymm2 by packed signed doubleword integers in ymm3/m256/m64bcst, and store the quadword results in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULDQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 28 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed signed doubleword integers in zmm2 by packed signed doubleword integers in zmm3/m512/m64bcst, and store the quadword results in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PMULHRSW--Multiply Packed Unsigned Integers with Round and Scale.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PMULHRSW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 0B /r</opc>
			<cpuid>
				<flag>SSSE3</flag>
			</cpuid>
			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULHRSW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38 0B /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULHRSW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38 0B /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULHRSW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F38.WIG 0B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULHRSW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F38.WIG 0B /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULHRSW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F38.WIG 0B /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Multiply 16-bit signed words, scale and round signed doublewords, pack high 16 bits to zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PMULHUW--Multiply Packed Unsigned Integers and Store High Result.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PMULHUW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F E4 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Multiply the packed unsigned word integers in xmm1 and xmm2/m128, and store the high 16 bits of the results in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULHUW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F E4 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Multiply the packed unsigned word integers in xmm2 and xmm3/m128, and store the high 16 bits of the results in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULHUW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F E4 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Multiply the packed unsigned word integers in ymm2 and ymm3/m256, and store the high 16 bits of the results in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULHUW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG E4 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Multiply the packed unsigned word integers in xmm2 and xmm3/m128, and store the high 16 bits of the results in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULHUW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG E4 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Multiply the packed unsigned word integers in ymm2 and ymm3/m256, and store the high 16 bits of the results in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULHUW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG E4 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Multiply the packed unsigned word integers in zmm2 and zmm3/m512, and store the high 16 bits of the results in zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PMULHW--Multiply Packed Integers and Store High Result.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PMULHW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F E5 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the high 16 bits of the results in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULHW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F E5 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Multiply the packed signed word integers in xmm2 and xmm3/m128, and store the high 16 bits of the results in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULHW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F E5 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Multiply the packed signed word integers in ymm2 and ymm3/m256, and store the high 16 bits of the results in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULHW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG E5 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Multiply the packed signed word integers in xmm2 and xmm3/m128, and store the high 16 bits of the results in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULHW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG E5 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Multiply the packed signed word integers in ymm2 and ymm3/m256, and store the high 16 bits of the results in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULHW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG E5 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Multiply the packed signed word integers in zmm2 and zmm3/m512, and store the high 16 bits of the results in zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PMULLD/PMULLQ--Multiply Packed Integers and Store Low Result.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PMULLD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 40 /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Multiply the packed dword signed integers in xmm1 and xmm2/m128 and store the low 32 bits of each product in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULLD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.WIG 40 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Multiply the packed dword signed integers in xmm2 and xmm3/m128 and store the low 32 bits of each product in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULLD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.WIG 40 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Multiply the packed dword signed integers in ymm2 and ymm3/m256 and store the low 32 bits of each product in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULLD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 40 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply the packed dword signed integers in xmm2 and xmm3/m128/m32bcst and store the low 32 bits of each product in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULLD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 40 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply the packed dword signed integers in ymm2 and ymm3/m256/m32bcst and store the low 32 bits of each product in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULLD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 40 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply the packed dword signed integers in zmm2 and zmm3/m512/m32bcst and store the low 32 bits of each product in zmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULLQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 40 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Multiply the packed qword signed integers in xmm2 and xmm3/m128/m64bcst and store the low 64 bits of each product in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULLQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 40 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Multiply the packed qword signed integers in ymm2 and ymm3/m256/m64bcst and store the low 64 bits of each product in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULLQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 40 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Multiply the packed qword signed integers in zmm2 and zmm3/m512/m64bcst and store the low 64 bits of each product in zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PMULLW--Multiply Packed Integers and Store Low Result.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PMULLW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F D5 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of the results in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULLW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F D5 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Multiply the packed signed word integers in xmm2 and xmm3/m128, and store the low 16 bits of the results in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULLW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F D5 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Multiply the packed signed word integers in ymm2 and ymm3/m256, and store the low 16 bits of the results in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULLW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG D5 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Multiply the packed signed word integers in xmm2 and xmm3/m128, and store the low 16 bits of the results in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULLW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG D5 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Multiply the packed signed word integers in ymm2 and ymm3/m256, and store the low 16 bits of the results in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULLW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG D5 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Multiply the packed signed word integers in zmm2 and zmm3/m512, and store the low 16 bits of the results in zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPMULTISHIFTQB--Select Packed Unaligned Bytes from Quadword Sources.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPMULTISHIFTQB</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 83 /r</opc>
			<cpuid>
				<flag>AVX512VBMI</flag>
				<flag>AVX512VL</flag>
			</cpuid>
			<dscrp>Select unaligned bytes from qwords in xmm3/m128/m64bcst using control bytes in xmm2, write byte results to xmm1 under k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULTISHIFTQB</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 83 /r</opc>
			<cpuid>
				<flag>AVX512VBMI</flag>
				<flag>AVX512VL</flag>
			</cpuid>
			<dscrp>Select unaligned bytes from qwords in ymm3/m256/m64bcst using control bytes in ymm2, write byte results to ymm1 under k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULTISHIFTQB</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 83 /r</opc>
			<cpuid>
				<flag>AVX512VBMI</flag>
			</cpuid>
			<dscrp>Select unaligned bytes from qwords in zmm3/m512/m64bcst using control bytes in zmm2, write byte results to zmm1 under k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PMULUDQ--Multiply Packed Unsigned Doubleword Integers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PMULUDQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F F4 /r</opc>
			<cpuid>
				<flag>SSE4_1</flag>
			</cpuid>
			<dscrp>Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers in xmm2/m128, and store the quadword results in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULUDQ</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F4 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Multiply packed unsigned doubleword integers in xmm2 by packed unsigned doubleword integers in xmm3/m128, and store the quadword results in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULUDQ</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F4 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Multiply packed unsigned doubleword integers in ymm2 by packed unsigned doubleword integers in ymm3/m256, and store the quadword results in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULUDQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 F4 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed unsigned doubleword integers in xmm2 by packed unsigned doubleword integers in xmm3/m128/m64bcst, and store the quadword results in xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULUDQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 F4 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed unsigned doubleword integers in ymm2 by packed unsigned doubleword integers in ymm3/m256/m64bcst, and store the quadword results in ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPMULUDQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 F4 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Multiply packed unsigned doubleword integers in zmm2 by packed unsigned doubleword integers in zmm3/m512/m64bcst, and store the quadword results in zmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>POR--Bitwise Logical Or.</brief>
		<ins x32m="V" x64m="V">
			<mnem>POR</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F EB /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Bitwise OR of xmm2/m128 and xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPOR</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG EB /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Bitwise OR of xmm2/m128 and xmm3.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPOR</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG EB /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Bitwise OR of ymm2/m256 and ymm3.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPORD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W0 EB /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise OR of packed doubleword integers in xmm2 and xmm3/m128/m32bcst using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPORD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W0 EB /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise OR of packed doubleword integers in ymm2 and ymm3/m256/m32bcst using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPORD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W0 EB /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise OR of packed doubleword integers in zmm2 and zmm3/m512/m32bcst using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPORQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 EB /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise OR of packed quadword integers in xmm2 and xmm3/m128/m64bcst using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPORQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 EB /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise OR of packed quadword integers in ymm2 and ymm3/m256/m64bcst using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPORQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 EB /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise OR of packed quadword integers in zmm2 and zmm3/m512/m64bcst using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PROLD/PROLVD/PROLQ/PROLVQ--Bit Rotate Left.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPROLVD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 15 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate doublewords in xmm2 left by count in the corresponding element of xmm3/m128/m32bcst. Result written to xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPROLD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDD.128.66.0F.W0 72 /1 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate doublewords in xmm2/m128/m32bcst left by imm8. Result written to xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPROLVQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 15 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate quadwords in xmm2 left by count in the corresponding element of xmm3/m128/m64bcst. Result written to xmm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPROLQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDD.128.66.0F.W1 72 /1 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate quadwords in xmm2/m128/m64bcst left by imm8. Result written to xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPROLVD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 15 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate doublewords in ymm2 left by count in the corresponding element of ymm3/m256/m32bcst. Result written to ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPROLD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDD.256.66.0F.W0 72 /1 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate doublewords in ymm2/m256/m32bcst left by imm8. Result written to ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPROLVQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 15 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate quadwords in ymm2 left by count in the corresponding element of ymm3/m256/m64bcst. Result written to ymm1 under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPROLQ</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDD.256.66.0F.W1 72 /1 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate quadwords in ymm2/m256/m64bcst left by imm8. Result written to ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPROLVD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 15 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate left of doublewords in zmm2 by count in the corresponding element of zmm3/m512/m32bcst. Result written to zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPROLD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDD.512.66.0F.W0 72 /1 ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate left of doublewords in zmm3/m512/m32bcst by imm8. Result written to zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPROLVQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 15 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate quadwords in zmm2 left by count in the corresponding element of zmm3/m512/m64bcst. Result written to zmm1under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPROLQ</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDD.512.66.0F.W1 72 /1 ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate quadwords in zmm2/m512/m64bcst left by imm8. Result written to zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV-VMI">
			<oprnd1>VEX.vvvv(w)</oprnd1>
			<oprnd2>ModRM:r/m(R)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV-RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PRORD/PRORVD/PRORQ/PRORVQ--Bit Rotate  Right.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPRORVD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 14 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate doublewords in xmm2 right by count in the corresponding element of xmm3/m128/m32bcst, store result using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPRORD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDD.128.66.0F.W0 72 /0 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate doublewords in xmm2/m128/m32bcst right by imm8, store result using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPRORVQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 14 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate quadwords in xmm2 right by count in the corresponding element of xmm3/m128/m64bcst, store result using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPRORQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDD.128.66.0F.W1 72 /0 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate quadwords in xmm2/m128/m64bcst right by imm8, store result using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPRORVD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 14 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate doublewords in ymm2 right by count in the corresponding element of ymm3/m256/m32bcst, store using result writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPRORD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDD.256.66.0F.W0 72 /0 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate doublewords in ymm2/m256/m32bcst right by imm8, store result using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPRORVQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 14 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate quadwords in ymm2 right by count in the corresponding element of ymm3/m256/m64bcst, store result using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPRORQ</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDD.256.66.0F.W1 72 /0 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate quadwords in ymm2/m256/m64bcst right by imm8, store result using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPRORVD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 14 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate doublewords in zmm2 right by count in the corresponding element of zmm3/m512/m32bcst, store result using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPRORD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDD.512.66.0F.W0 72 /0 ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate doublewords in zmm2/m512/m32bcst right by imm8, store result using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPRORVQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 14 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate quadwords in zmm2 right by count in the corresponding element of zmm3/m512/m64bcst, store result using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPRORQ</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDD.512.66.0F.W1 72 /0 ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rotate quadwords in zmm2/m512/m64bcst right by imm8, store result using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV-VMI">
			<oprnd1>VEX.vvvv(w)</oprnd1>
			<oprnd2>ModRM:r/m(R)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV-RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPSCATTERDD/VPSCATTERDQ/VPSCATTERQD/VPSCATTERQQ--Scatter Packed Dword, Packed Qword with Signed Dword, Signed Qword Indices.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPSCATTERDD</mnem>
			<args>vm32x {k1},xmm1</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 A0 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, scatter dword values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSCATTERDD</mnem>
			<args>vm32y {k1},ymm1</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 A0 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, scatter dword values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSCATTERDD</mnem>
			<args>vm32z {k1},zmm1</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 A0 /vsib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, scatter dword values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSCATTERDQ</mnem>
			<args>vm32x {k1},xmm1</args>
			<opc openc="T1S">EVEX.128.66.0F38.W1 A0 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, scatter qword values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSCATTERDQ</mnem>
			<args>vm32x {k1},ymm1</args>
			<opc openc="T1S">EVEX.256.66.0F38.W1 A0 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, scatter qword values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSCATTERDQ</mnem>
			<args>vm32y {k1},zmm1</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 A0 /vsib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, scatter qword values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSCATTERQD</mnem>
			<args>vm64x {k1},xmm1</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 A1 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, scatter dword values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSCATTERQD</mnem>
			<args>vm64y {k1},xmm1</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 A1 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, scatter dword values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSCATTERQD</mnem>
			<args>vm64z {k1},ymm1</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 A1 /vsib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, scatter dword values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSCATTERQQ</mnem>
			<args>vm64x {k1},xmm1</args>
			<opc openc="T1S">EVEX.128.66.0F38.W1 A1 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, scatter qword values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSCATTERQQ</mnem>
			<args>vm64y {k1},ymm1</args>
			<opc openc="T1S">EVEX.256.66.0F38.W1 A1 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, scatter qword values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSCATTERQQ</mnem>
			<args>vm64z {k1},zmm1</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 A1 /vsib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, scatter qword values to memory using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PSHUFB--Packed Shuffle Bytes.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PSHUFB</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 38 00 /r</opc>
			<cpuid>
				<flag>SSSE3</flag>
			</cpuid>
			<dscrp>Shuffle bytes in xmm1 according to contents of xmm2/m128.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFB</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38 00 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shuffle bytes in xmm2 according to contents of xmm3/m128.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFB</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38 00 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shuffle bytes in ymm2 according to contents of ymm3/m256.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFB</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F38.WIG 00 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shuffle bytes in xmm2 according to contents of xmm3/m128 under write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFB</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F38.WIG 00 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shuffle bytes in ymm2 according to contents of ymm3/m256 under write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFB</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F38.WIG 00 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shuffle bytes in zmm2 according to contents of zmm3/m512 under write mask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PSHUFHW--Shuffle Packed High Words.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PSHUFHW</mnem>
			<args>xmm1,xmm2/m128,imm8</args>
			<opc openc="RMI">F3 0F 70 /r ib</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFHW</mnem>
			<args>xmm1,xmm2/m128,imm8</args>
			<opc openc="RMI">VEX.128.F3.0F 70 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFHW</mnem>
			<args>ymm1,ymm2/m256,imm8</args>
			<opc openc="RMI">VEX.256.F3.0F 70 /r ib</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shuffle the high words in ymm2/m256 based on the encoding in imm8 and store the result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFHW</mnem>
			<args>xmm1 {k1}{z},xmm2/m128,imm8</args>
			<opc openc="FVM">EVEX.128.F3.0F.WIG 70 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1 under write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFHW</mnem>
			<args>ymm1 {k1}{z},ymm2/m256,imm8</args>
			<opc openc="FVM">EVEX.256.F3.0F.WIG 70 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shuffle the high words in ymm2/m256 based on the encoding in imm8 and store the result in ymm1 under write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFHW</mnem>
			<args>zmm1 {k1}{z},zmm2/m512,imm8</args>
			<opc openc="FVM">EVEX.512.F3.0F.WIG 70 /r ib</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shuffle the high words in zmm2/m512 based on the encoding in imm8 and store the result in zmm1 under write mask k1.</dscrp>
		</ins>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PSHUFLW--Shuffle Packed Low Words.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PSHUFLW</mnem>
			<args>xmm1,xmm2/m128,imm8</args>
			<opc openc="RMI">F2 0F 70 /r ib</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFLW</mnem>
			<args>xmm1,xmm2/m128,imm8</args>
			<opc openc="RMI">VEX.128.F2.0F 70 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFLW</mnem>
			<args>ymm1,ymm2/m256,imm8</args>
			<opc openc="RMI">VEX.256.F2.0F 70 /r ib</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shuffle the low words in ymm2/m256 based on the encoding in imm8 and store the result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFLW</mnem>
			<args>xmm1 {k1}{z},xmm2/m128,imm8</args>
			<opc openc="FVM">EVEX.128.F2.0F.WIG 70 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1 under write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFLW</mnem>
			<args>ymm1 {k1}{z},ymm2/m256,imm8</args>
			<opc openc="FVM">EVEX.256.F2.0F.WIG 70 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shuffle the low words in ymm2/m256 based on the encoding in imm8 and store the result in ymm1 under write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFLW</mnem>
			<args>zmm1 {k1}{z},zmm2/m512,imm8</args>
			<opc openc="FVM">EVEX.512.F2.0F.WIG 70 /r ib</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shuffle the low words in zmm2/m512 based on the encoding in imm8 and store the result in zmm1 under write mask k1.</dscrp>
		</ins>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PSHUFD--Shuffle Packed Doublewords.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PSHUFD</mnem>
			<args>xmm1,xmm2/m128,imm8</args>
			<opc openc="RMI">66 0F 70 /r ib</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFD</mnem>
			<args>xmm1,xmm2/m128,imm8</args>
			<opc openc="RMI">VEX.128.66.0F.WIG 70 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and store the result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFD</mnem>
			<args>ymm1,ymm2/m256,imm8</args>
			<opc openc="RMI">VEX.256.66.0F.WIG 70 /r ib</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shuffle the doublewords in ymm2/m256 based on the encoding in imm8 and store the result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
			<opc openc="FV">EVEX.128.66.0F.W0 70 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shuffle the doublewords in xmm2/m128/m32bcst based on the encoding in imm8 and store the result in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.256.66.0F.W0 70 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shuffle the doublewords in ymm2/m256/m32bcst based on the encoding in imm8 and store the result in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSHUFD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
			<opc openc="FV">EVEX.512.66.0F.W0 70 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shuffle the doublewords in zmm2/m512/m32bcst based on the encoding in imm8 and store the result in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PSLLDQ--Byte Shift Left.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PSLLDQ</mnem>
			<args>xmm1,imm8</args>
			<opc openc="MI">66 0F 73 /7 ib</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shift xmm1 left by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLDQ</mnem>
			<args>xmm1,xmm2,imm8</args>
			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 73 /7 ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shift xmm2 left by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLDQ</mnem>
			<args>ymm1,ymm2,imm8</args>
			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 73 /7 ib</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift ymm2 left by imm8 bytes while shifting in 0s and store result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLDQ</mnem>
			<args>xmm1,xmm2/ m128,imm8</args>
			<opc openc="FVM">EVEX.NDD.128.66.0F.WIG 73 /7 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift xmm2/m128 left by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLDQ</mnem>
			<args>ymm1,ymm2/m256,imm8</args>
			<opc openc="FVM">EVEX.NDD.256.66.0F.WIG 73 /7 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift ymm2/m256 left by imm8 bytes while shifting in 0s and store result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLDQ</mnem>
			<args>zmm1,zmm2/m512,imm8</args>
			<opc openc="FVM">EVEX.NDD.512.66.0F.WIG 73 /7 ib</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift zmm2/m512 left by imm8 bytes while shifting in 0s and store result in zmm1.</dscrp>
		</ins>
		<oprndenc openc="MI">
			<oprnd1>ModRM:r/m(r,w)</oprnd1>
			<oprnd2>Imm8</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="VMI">
			<oprnd1>VEX.vvvv(w)</oprnd1>
			<oprnd2>ModRM:r/m(R)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>EVEX.vvvv(w)</oprnd1>
			<oprnd2>ModRM:r/m(R)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PSLLW/PSLLD/PSLLQ--Bit Shift Left.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PSLLW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F F1/r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shift words in xmm1 left by amount specified in xmm2/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PSLLW</mnem>
			<args>xmm1,imm8</args>
			<opc openc="MI">66 0F 71 /6 ib</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shift words in xmm1 left by imm8 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PSLLD</mnem>
			<args>xmm1,imm8</args>
			<opc openc="MI">66 0F 72 /6 ib</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm1 left by imm8 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PSLLQ</mnem>
			<args>xmm1,imm8</args>
			<opc openc="MI">66 0F 73 /6 ib</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shift quadwords in xmm1 left by imm8 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F1 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shift words in xmm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLW</mnem>
			<args>xmm1,xmm2,imm8</args>
			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 71 /6 ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shift words in xmm2 left by imm8 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F2 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLD</mnem>
			<args>xmm1,xmm2,imm8</args>
			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 72 /6 ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm2 left by imm8 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLQ</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F3 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shift quadwords in xmm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLQ</mnem>
			<args>xmm1,xmm2,imm8</args>
			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 73 /6 ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shift quadwords in xmm2 left by imm8 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLW</mnem>
			<args>ymm1,ymm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F1 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift words in ymm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLW</mnem>
			<args>ymm1,ymm2,imm8</args>
			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 71 /6 ib</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift words in ymm2 left by imm8 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLD</mnem>
			<args>ymm1,ymm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F2 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift doublewords in ymm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLD</mnem>
			<args>ymm1,ymm2,imm8</args>
			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 72 /6 ib</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift doublewords in ymm2 left by imm8 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLQ</mnem>
			<args>ymm1,ymm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F3 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift quadwords in ymm2 left by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLQ</mnem>
			<args>ymm1,ymm2,imm8</args>
			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 73 /6 ib</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift quadwords in ymm2 left by imm8 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.128.66.0F.WIG F1 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in xmm2 left by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLW</mnem>
			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.256.66.0F.WIG F1 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in ymm2 left by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLW</mnem>
			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.512.66.0F.WIG F1 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in zmm2 left by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLW</mnem>
			<args>xmm1 {k1}{z},xmm2/m128,imm8</args>
			<opc openc="FVM">EVEX.NDD.128.66.0F.WIG 71 /6 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in xmm2/m128 left by imm8 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLW</mnem>
			<args>ymm1 {k1}{z},ymm2/m256,imm8</args>
			<opc openc="FVM">EVEX.NDD.256.66.0F.WIG 71 /6 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in ymm2/m256 left by imm8 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLW</mnem>
			<args>zmm1 {k1}{z},zmm2/m512,imm8</args>
			<opc openc="FVM">EVEX.NDD.512.66.0F.WIG 71 /6 ib</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in zmm2/m512 left by imm8 while shifting in 0 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.128.66.0F.W0 F2 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm2 left by amount specified in xmm3/m128 while shifting in 0s under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLD</mnem>
			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.256.66.0F.W0 F2 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in ymm2 left by amount specified in xmm3/m128 while shifting in 0s under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLD</mnem>
			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.512.66.0F.W0 F2 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in zmm2 left by amount specified in xmm3/m128 while shifting in 0s under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
			<opc openc="FVI">EVEX.NDD.128.66.0F.W0 72 /6 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm2/m128/m32bcst left by imm8 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
			<opc openc="FVI">EVEX.NDD.256.66.0F.W0 72 /6 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in ymm2/m256/m32bcst left by imm8 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
			<opc openc="FVI">EVEX.NDD.512.66.0F.W0 72 /6 ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in zmm2/m512/m32bcst left by imm8 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.128.66.0F.W1 F3 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in xmm2 left by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLQ</mnem>
			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.256.66.0F.W1 F3 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in ymm2 left by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLQ</mnem>
			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.512.66.0F.W1 F3 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in zmm2 left by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
			<opc openc="FVI">EVEX.NDD.128.66.0F.W1 73 /6 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in xmm2/m128/m64bcst left by imm8 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLQ</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
			<opc openc="FVI">EVEX.NDD.256.66.0F.W1 73 /6 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in ymm2/m256/m64bcst left by imm8 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLQ</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
			<opc openc="FVI">EVEX.NDD.512.66.0F.W1 73 /6 ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in zmm2/m512/m64bcst left by imm8 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="MI">
			<oprnd1>ModRM:r/m(r,w)</oprnd1>
			<oprnd2>Imm8</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="VMI">
			<oprnd1>VEX.vvvv(w)</oprnd1>
			<oprnd2>ModRM:r/m(R)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>EVEX.vvvv(w)</oprnd1>
			<oprnd2>ModRM:r/m(R)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVI">
			<oprnd1>EVEX.vvvv(w)</oprnd1>
			<oprnd2>ModRM:r/m(R)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="M128">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PSRAW/PSRAD/PSRAQ--Bit Shift Arithmetic Right.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PSRAW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F E1/r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shift words in xmm1 right by amount specified in xmm2/m128 while shifting in sign bits.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PSRAW</mnem>
			<args>xmm1,imm8</args>
			<opc openc="MI">66 0F 71 /4 ib</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shift words in xmm1 right by imm8 while shifting in sign bits.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PSRAD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F E2 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm1 right by amount specified in xmm2/m128 while shifting in sign bits.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PSRAD</mnem>
			<args>xmm1,imm8</args>
			<opc openc="MI">66 0F 72 /4 ib</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm1 right by imm8 while shifting in sign bits.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG E1 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shift words in xmm2 right by amount specified in xmm3/m128 while shifting in sign bits.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAW</mnem>
			<args>xmm1,xmm2,imm8</args>
			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 71 /4 ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shift words in xmm2 right by imm8 while shifting in sign bits.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG E2 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm2 right by amount specified in xmm3/m128 while shifting in sign bits.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAD</mnem>
			<args>xmm1,xmm2,imm8</args>
			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 72 /4 ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm2 right by imm8 while shifting in sign bits.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAW</mnem>
			<args>ymm1,ymm2,ymm3/m128</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG E1 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift words in ymm2 right by amount specified in ymm3/m128 while shifting in sign bits.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAW</mnem>
			<args>ymm1,ymm2,imm8</args>
			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 71 /4 ib</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift words in ymm2 right by imm8 while shifting in sign bits.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAD</mnem>
			<args>ymm1,ymm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG E2 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift doublewords in ymm2 right by amount specified in ymm3/m128 while shifting in sign bits.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAD</mnem>
			<args>ymm1,ymm2,imm8</args>
			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 72 /4 ib</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift doublewords in ymm2 right by imm8 while shifting in sign bits.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.128.66.0F.WIG E1 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in xmm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAW</mnem>
			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.256.66.0F.WIG E1 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in ymm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAW</mnem>
			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.512.66.0F.WIG E1 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in zmm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAW</mnem>
			<args>xmm1 {k1}{z},xmm2/m128,imm8</args>
			<opc openc="FVM">EVEX.NDD.128.66.0F.WIG 71 /4 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in xmm2/m128 right by imm8 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAW</mnem>
			<args>ymm1 {k1}{z},ymm2/m256,imm8</args>
			<opc openc="FVM">EVEX.NDD.256.66.0F.WIG 71 /4 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in ymm2/m256 right by imm8 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAW</mnem>
			<args>zmm1 {k1}{z},zmm2/m512,imm8</args>
			<opc openc="FVM">EVEX.NDD.512.66.0F.WIG 71 /4 ib</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in zmm2/m512 right by imm8 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.128.66.0F.W0 E2 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAD</mnem>
			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.256.66.0F.W0 E2 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in ymm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAD</mnem>
			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.512.66.0F.W0 E2 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in zmm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
			<opc openc="FVI">EVEX.NDD.128.66.0F.W0 72 /4 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm2/m128/m32bcst right by imm8 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
			<opc openc="FVI">EVEX.NDD.256.66.0F.W0 72 /4 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in ymm2/m256/m32bcst right by imm8 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
			<opc openc="FVI">EVEX.NDD.512.66.0F.W0 72 /4 ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in zmm2/m512/m32bcst right by imm8 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.128.66.0F.W1 E2 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in xmm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAQ</mnem>
			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.256.66.0F.W1 E2 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in ymm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAQ</mnem>
			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.512.66.0F.W1 E2 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in zmm2 right by amount specified in xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
			<opc openc="FVI">EVEX.NDD.128.66.0F.W1 72 /4 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in xmm2/m128/m64bcst right by imm8 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAQ</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
			<opc openc="FVI">EVEX.NDD.256.66.0F.W1 72 /4 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in ymm2/m256/m64bcst right by imm8 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAQ</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
			<opc openc="FVI">EVEX.NDD.512.66.0F.W1 72 /4 ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in zmm2/m512/m64bcst right by imm8 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="MI">
			<oprnd1>ModRM:r/m(r,w)</oprnd1>
			<oprnd2>Imm8</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="VMI">
			<oprnd1>VEX.vvvv(w)</oprnd1>
			<oprnd2>ModRM:r/m(R)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>EVEX.vvvv(w)</oprnd1>
			<oprnd2>ModRM:r/m(R)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVI">
			<oprnd1>EVEX.vvvv(w)</oprnd1>
			<oprnd2>ModRM:r/m(R)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="M128">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PSRLDQ--Byte Shift Right.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PSRLDQ</mnem>
			<args>xmm1,imm8</args>
			<opc openc="MI">66 0F 73 /3 ib</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shift xmm1 right by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLDQ</mnem>
			<args>xmm1,xmm2,imm8</args>
			<opc openc="VMI">VEX.NDD.128.66.0F 73 /3 ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shift xmm2 right by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLDQ</mnem>
			<args>ymm1,ymm2,imm8</args>
			<opc openc="VMI">VEX.NDD.256.66.0F 73 /3 ib</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift ymm2 right by imm8 bytes while shifting in 0s and store result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLDQ</mnem>
			<args>xmm1,xmm2/m128,imm8</args>
			<opc openc="FVM">EVEX.NDD.128.66.0F.WIG 73 /3 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift xmm2/m128 right by imm8 bytes while shifting in 0s and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLDQ</mnem>
			<args>ymm1,ymm2/m256,imm8</args>
			<opc openc="FVM">EVEX.NDD.256.66.0F.WIG 73 /3 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift ymm2/m256 right by imm8 bytes while shifting in 0s and store result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLDQ</mnem>
			<args>zmm1,zmm2/m512,imm8</args>
			<opc openc="FVM">EVEX.NDD.512.66.0F.WIG 73 /3 ib</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift zmm2/m512 right by imm8 bytes while shifting in 0s and store result in zmm1.</dscrp>
		</ins>
		<oprndenc openc="MI">
			<oprnd1>ModRM:r/m(r,w)</oprnd1>
			<oprnd2>Imm8</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="VMI">
			<oprnd1>VEX.vvvv(w)</oprnd1>
			<oprnd2>ModRM:r/m(R)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>EVEX.vvvv(w)</oprnd1>
			<oprnd2>ModRM:r/m(R)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PSRLW/PSRLD/PSRLQ--Shift Packed Data Right Logical.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PSRLW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F D1 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shift words in xmm1 right by amount specified in xmm2/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PSRLW</mnem>
			<args>xmm1,imm8</args>
			<opc openc="MI">66 0F 71 /2 ib</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shift words in xmm1 right by imm8 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PSRLD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F D2 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm1 right by amount specified in xmm2/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PSRLD</mnem>
			<args>xmm1,imm8</args>
			<opc openc="MI">66 0F 72 /2 ib</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm1 right by imm8 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PSRLQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F D3 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shift quadwords in xmm1 right by amount specified in xmm2/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PSRLQ</mnem>
			<args>xmm1,imm8</args>
			<opc openc="MI">66 0F 73 /2 ib</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shift quadwords in xmm1 right by imm8 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D1 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shift words in xmm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLW</mnem>
			<args>xmm1,xmm2,imm8</args>
			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 71 /2 ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shift words in xmm2 right by imm8 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D2 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLD</mnem>
			<args>xmm1,xmm2,imm8</args>
			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 72 /2 ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm2 right by imm8 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLQ</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG D3 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shift quadwords in xmm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLQ</mnem>
			<args>xmm1,xmm2,imm8</args>
			<opc openc="VMI">VEX.NDD.128.66.0F.WIG 73 /2 ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shift quadwords in xmm2 right by imm8 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLW</mnem>
			<args>ymm1,ymm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D1 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift words in ymm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLW</mnem>
			<args>ymm1,ymm2,imm8</args>
			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 71 /2 ib</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift words in ymm2 right by imm8 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLD</mnem>
			<args>ymm1,ymm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D2 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift doublewords in ymm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLD</mnem>
			<args>ymm1,ymm2,imm8</args>
			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 72 /2 ib</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift doublewords in ymm2 right by imm8 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLQ</mnem>
			<args>ymm1,ymm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG D3 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift quadwords in ymm2 right by amount specified in xmm3/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLQ</mnem>
			<args>ymm1,ymm2,imm8</args>
			<opc openc="VMI">VEX.NDD.256.66.0F.WIG 73 /2 ib</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift quadwords in ymm2 right by imm8 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.128.66.0F.WIG D1 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in xmm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLW</mnem>
			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.256.66.0F.WIG D1 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in ymm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLW</mnem>
			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.512.66.0F.WIG D1 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in zmm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLW</mnem>
			<args>xmm1 {k1}{z},xmm2/m128,imm8</args>
			<opc openc="FVM">EVEX.NDD.128.66.0F.WIG 71 /2 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in xmm2/m128 right by imm8 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLW</mnem>
			<args>ymm1 {k1}{z},ymm2/m256,imm8</args>
			<opc openc="FVM">EVEX.NDD.256.66.0F.WIG 71 /2 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in ymm2/m256 right by imm8 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLW</mnem>
			<args>zmm1 {k1}{z},zmm2/m512,imm8</args>
			<opc openc="FVM">EVEX.NDD.512.66.0F.WIG 71 /2 ib</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in zmm2/m512 right by imm8 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.128.66.0F.W0 D2 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLD</mnem>
			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.256.66.0F.W0 D2 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in ymm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLD</mnem>
			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.512.66.0F.W0 D2 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in zmm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDD.128.66.0F.W0 72 /2 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm2/m128/m32bcst right by imm8 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDD.256.66.0F.W0 72 /2 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in ymm2/m256/m32bcst right by imm8 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst,imm8</args>
			<opc openc="FVI">EVEX.NDD.512.66.0F.W0 72 /2 ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in zmm2/m512/m32bcst right by imm8 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.128.66.0F.W1 D3 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in xmm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLQ</mnem>
			<args>ymm1 {k1}{z},ymm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.256.66.0F.W1 D3 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in ymm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLQ</mnem>
			<args>zmm1 {k1}{z},zmm2,xmm3/m128</args>
			<opc openc="M128">EVEX.NDS.512.66.0F.W1 D3 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in zmm2 right by amount specified in xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLQ</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDD.128.66.0F.W1 73 /2 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in xmm2/m128/m64bcst right by imm8 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLQ</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDD.256.66.0F.W1 73 /2 ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in ymm2/m256/m64bcst right by imm8 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLQ</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst,imm8</args>
			<opc openc="FVI">EVEX.NDD.512.66.0F.W1 73 /2 ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in zmm2/m512/m64bcst right by imm8 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="MI">
			<oprnd1>ModRM:r/m(r,w)</oprnd1>
			<oprnd2>Imm8</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="VMI">
			<oprnd1>VEX.vvvv(w)</oprnd1>
			<oprnd2>ModRM:r/m(R)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>EVEX.vvvv(w)</oprnd1>
			<oprnd2>ModRM:r/m(R)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVI">
			<oprnd1>EVEX.vvvv(w)</oprnd1>
			<oprnd2>ModRM:r/m(R)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="M128">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPSLLVW/VPSLLVD/VPSLLVQ--Variable Bit Shift Left Logical.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLVD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 47 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm2 left by amount specified in the corresponding element of xmm3/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLVQ</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 47 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift quadwords in xmm2 left by amount specified in the corresponding element of xmm3/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLVD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 47 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift doublewords in ymm2 left by amount specified in the corresponding element of ymm3/m256 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLVQ</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 47 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift quadwords in ymm2 left by amount specified in the corresponding element of ymm3/m256 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLVW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F38.W1 12 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in xmm2 left by amount specified in the corresponding element of xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLVW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F38.W1 12 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in ymm2 left by amount specified in the corresponding element of ymm3/m256 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLVW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F38.W1 12 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in zmm2 left by amount specified in the corresponding element of zmm3/m512 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLVD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 47 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm2 left by amount specified in the corresponding element of xmm3/m128/m32bcst while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLVD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 47 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in ymm2 left by amount specified in the corresponding element of ymm3/m256/m32bcst while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLVD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 47 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in zmm2 left by amount specified in the corresponding element of zmm3/m512/m32bcst while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLVQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 47 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in xmm2 left by amount specified in the corresponding element of xmm3/m128/m64bcst while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLVQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 47 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in ymm2 left by amount specified in the corresponding element of ymm3/m256/m64bcst while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSLLVQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 47 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in zmm2 left by amount specified in the corresponding element of zmm3/m512/m64bcst while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPSRLVW/VPSRLVD/VPSRLVQ--Variable Bit Shift Right Logical.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLVD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 45 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm2 right by amount specified in the corresponding element of xmm3/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLVQ</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W1 45 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift quadwords in xmm2 right by amount specified in the corresponding element of xmm3/m128 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLVD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 45 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift doublewords in ymm2 right by amount specified in the corresponding element of ymm3/m256 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLVQ</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W1 45 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift quadwords in ymm2 right by amount specified in the corresponding element of ymm3/m256 while shifting in 0s.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLVW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F38.W1 10 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in xmm2 right by amount specified in the corresponding element of xmm3/m128 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLVW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F38.W1 10 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in ymm2 right by amount specified in the corresponding element of ymm3/m256 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLVW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F38.W1 10 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in zmm2 right by amount specified in the corresponding element of zmm3/m512 while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLVD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 45 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm2 right by amount specified in the corresponding element of xmm3/m128/m32bcst while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLVD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 45 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in ymm2 right by amount specified in the corresponding element of ymm3/m256/m32bcst while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLVD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 45 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in zmm2 right by amount specified in the corresponding element of zmm3/m512/m32bcst while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLVQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 45 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in xmm2 right by amount specified in the corresponding element of xmm3/m128/m64bcst while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLVQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 45 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in ymm2 right by amount specified in the corresponding element of ymm3/m256/m64bcst while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRLVQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 45 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in zmm2 right by amount specified in the corresponding element of zmm3/m512/m64bcst while shifting in 0s using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PSUBB/PSUBW/PSUBD/PSUBQ--Packed Integer Subtract.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PSUBB</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F F8 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Subtract packed byte integers in xmm2/m128 from xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PSUBW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F F9 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Subtract packed word integers in xmm2/m128 from xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PSUBD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F FA /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Subtract packed doubleword integers in xmm2/m128 from xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PSUBQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F FB/r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Subtract packed quadword integers in xmm2/m128 from xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBB</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F8 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Subtract packed byte integers in xmm3/m128 from xmm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG F9 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Subtract packed word integers in xmm3/m128 from xmm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FA /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Subtract packed doubleword integers in xmm3/m128 from xmm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBQ</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG FB/r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Subtract packed quadword integers in xmm3/m128 from xmm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBB</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F8 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Subtract packed byte integers in ymm3/m256 from ymm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG F9 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Subtract packed word integers in ymm3/m256 from ymm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FA /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Subtract packed doubleword integers in ymm3/m256 from ymm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBQ</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG FB/r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Subtract packed quadword integers in ymm3/m256 from ymm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBB</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG F8 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Subtract packed byte integers in xmm3/m128 from xmm2 and store in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBB</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG F8 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Subtract packed byte integers in ymm3/m256 from ymm2 and store in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBB</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG F8 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Subtract packed byte integers in zmm3/m512 from zmm2 and store in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG F9 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Subtract packed word integers in xmm3/m128 from xmm2 and store in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG F9 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Subtract packed word integers in ymm3/m256 from ymm2 and store in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG F9 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Subtract packed word integers in zmm3/m512 from zmm2 and store in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W0 FA /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Subtract packed doubleword integers in xmm3/m128/m32bcst from xmm2 and store in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W0 FA /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Subtract packed doubleword integers in ymm3/m256/m32bcst from ymm2 and store in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W0 FA /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Subtract packed doubleword integers in zmm3/m512/m32bcst from zmm2 and store in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 FB /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Subtract packed quadword integers in xmm3/m128/m64bcst from xmm2 and store in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 FB /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Subtract packed quadword integers in ymm3/m256/m64bcst from ymm2 and store in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 FB/r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Subtract packed quadword integers in zmm3/m512/m64bcst from zmm2 and store in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PSUBSB/PSUBSW--Subtract Packed Signed Integers with Signed Saturation.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PSUBSB</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F E8 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Subtract packed signed byte integers in xmm2/m128 from packed signed byte integers in xmm1 and saturate results.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PSUBSW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F E9 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Subtract packed signed word integers in xmm2/m128 from packed signed word integers in xmm1 and saturate results.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBSB</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F E8 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Subtract packed signed byte integers in xmm3/m128 from packed signed byte integers in xmm2 and saturate results.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBSW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F E9 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Subtract packed signed word integers in xmm3/m128 from packed signed word integers in xmm2 and saturate results.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBSB</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F E8 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Subtract packed signed byte integers in ymm3/m256 from packed signed byte integers in ymm2 and saturate results.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBSW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F E9 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Subtract packed signed word integers in ymm3/m256 from packed signed word integers in ymm2 and saturate results.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBSB</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG E8 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Subtract packed signed byte integers in xmm3/m128 from packed signed byte integers in xmm2 and saturate results and store in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBSB</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG E8 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Subtract packed signed byte integers in ymm3/m256 from packed signed byte integers in ymm2 and saturate results and store in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBSB</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG E8 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Subtract packed signed byte integers in zmm3/m512 from packed signed byte integers in zmm2 and saturate results and store in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBSW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG E9 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Subtract packed signed word integers in xmm3/m128 from packed signed word integers in xmm2 and saturate results and store in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBSW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG E9 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Subtract packed signed word integers in ymm3/m256 from packed signed word integers in ymm2 and saturate results and store in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBSW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG E9 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Subtract packed signed word integers in zmm3/m512 from packed signed word integers in zmm2 and saturate results and store in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PSUBUSB/PSUBUSW--Subtract Packed Unsigned Integers with Unsigned Saturation.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PSUBUSB</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F D8 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Subtract packed unsigned byte integers in xmm2/m128 from packed unsigned byte integers in xmm1 and saturate result.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PSUBUSW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F D9 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Subtract packed unsigned word integers in xmm2/m128 from packed unsigned word integers in xmm1 and saturate result.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBUSB</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F D8 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Subtract packed unsigned byte integers in xmm3/m128 from packed unsigned byte integers in xmm2 and saturate result.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBUSW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F D9 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Subtract packed unsigned word integers in xmm3/m128 from packed unsigned word integers in xmm2 and saturate result.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBUSB</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F D8 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Subtract packed unsigned byte integers in ymm3/m256 from packed unsigned byte integers in ymm2 and saturate result.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBUSW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F D9 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Subtract packed unsigned word integers in ymm3/m256 from packed unsigned word integers in ymm2 and saturate result.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBUSB</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG D8 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Subtract packed unsigned byte integers in xmm3/m128 from packed unsigned byte integers in xmm2, saturate results and store in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBUSB</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG D8 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Subtract packed unsigned byte integers in ymm3/m256 from packed unsigned byte integers in ymm2, saturate results and store in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBUSB</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG D8 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Subtract packed unsigned byte integers in zmm3/m512 from packed unsigned byte integers in zmm2, saturate results and store in zmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBUSW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG D9 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Subtract packed unsigned word integers in xmm3/m128 from packed unsigned word integers in xmm2 and saturate results and store in xmm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBUSW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG D9 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Subtract packed unsigned word integers in ymm3/m256 from packed unsigned word integers in ymm2, saturate results and store in ymm1 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSUBUSW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG D9 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Subtract packed unsigned word integers in zmm3/m512 from packed unsigned word integers in zmm2, saturate results and store in zmm1 using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPTESTNMB/W/D/Q--Logical NAND and Set.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTNMB</mnem>
			<args>k2 {k1},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.F3.0F38.W0 26 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise NAND of packed byte integers in xmm2 and xmm3/m128 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTNMB</mnem>
			<args>k2 {k1},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.F3.0F38.W0 26 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise NAND of packed byte integers in ymm2 and ymm3/m256 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTNMB</mnem>
			<args>k2 {k1},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.F3.0F38.W0 26 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise NAND of packed byte integers in zmm2 and zmm3/m512 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTNMW</mnem>
			<args>k2 {k1},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.F3.0F38.W1 26 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise NAND of packed word integers in xmm2 and xmm3/m128 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTNMW</mnem>
			<args>k2 {k1},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.F3.0F38.W1 26 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise NAND of packed word integers in ymm2 and ymm3/m256 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTNMW</mnem>
			<args>k2 {k1},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.F3.0F38.W1 26 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise NAND of packed word integers in zmm2 and zmm3/m512 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTNMD</mnem>
			<args>k2 {k1},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.F3.0F38.W0 27 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise NAND of packed doubleword integers in xmm2 and xmm3/m128/m32bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTNMD</mnem>
			<args>k2 {k1},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.F3.0F38.W0 27 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise NAND of packed doubleword integers in ymm2 and ymm3/m256/m32bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTNMD</mnem>
			<args>k2 {k1},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.F3.0F38.W0 27 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise NAND of packed doubleword integers in zmm2 and zmm3/m512/m32bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTNMQ</mnem>
			<args>k2 {k1},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.F3.0F38.W1 27 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise NAND of packed quadword integers in xmm2 and xmm3/m128/m64bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTNMQ</mnem>
			<args>k2 {k1},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.F3.0F38.W1 27 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise NAND of packed quadword integers in ymm2 and ymm3/m256/m64bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTNMQ</mnem>
			<args>k2 {k1},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.F3.0F38.W1 27 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise NAND of packed quadword integers in zmm2 and zmm3/m512/m64bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PUNPCKHBW/PUNPCKHWD/PUNPCKHDQ/PUNPCKHQDQ--Unpack High Data.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PUNPCKHBW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 68 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Interleave high-order bytes from xmm1 and xmm2/m128 into xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PUNPCKHWD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 69 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Interleave high-order words from xmm1 and xmm2/m128 into xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PUNPCKHDQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 6A /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Interleave high-order doublewords from xmm1 and xmm2/m128 into xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PUNPCKHQDQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 6D /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Interleave high-order quadword from xmm1 and xmm2/m128 into xmm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHBW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 68 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Interleave high-order bytes from xmm2 and xmm3/m128 into xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHWD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 69 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Interleave high-order words from xmm2 and xmm3/m128 into xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHDQ</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 6A /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Interleave high-order doublewords from xmm2 and xmm3/m128 into xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHQDQ</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 6D /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Interleave high-order quadword from xmm2 and xmm3/m128 into xmm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHBW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 68 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Interleave high-order bytes from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHWD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 69 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Interleave high-order words from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHDQ</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 6A /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Interleave high-order doublewords from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHQDQ</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 6D /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Interleave high-order quadword from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHBW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 68 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Interleave high-order bytes from xmm2 and xmm3/m128 into xmm1 register using k1 write mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHWD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 69 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Interleave high-order words from xmm2 and xmm3/m128 into xmm1 register using k1 write mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHDQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W0 6A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Interleave high-order doublewords from xmm2 and xmm3/m128/m32bcst into xmm1 register using k1 write mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHQDQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 6D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Interleave high-order quadword from xmm2 and xmm3/m128/m64bcst into xmm1 register using k1 write mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHBW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 68 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Interleave high-order bytes from ymm2 and ymm3/m256 into ymm1 register using k1 write mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHWD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 69 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Interleave high-order words from ymm2 and ymm3/m256 into ymm1 register using k1 write mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHDQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W0 6A /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Interleave high-order doublewords from ymm2 and ymm3/m256/m32bcst into ymm1 register using k1 write mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHQDQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 6D /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Interleave high-order quadword from ymm2 and ymm3/m256/m64bcst into ymm1 register using k1 write mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHBW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 68/r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Interleave high-order bytes from zmm2 and zmm3/m512 into zmm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHWD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 69/r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Interleave high-order words from zmm2 and zmm3/m512 into zmm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHDQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W0 6A /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Interleave high-order doublewords from zmm2 and zmm3/m512/m32bcst into zmm1 register using k1 write mask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKHQDQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 6D /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Interleave high-order quadword from zmm2 and zmm3/m512/m64bcst into zmm1 register using k1 write mask.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PUNPCKLBW/PUNPCKLWD/PUNPCKLDQ/PUNPCKLQDQ--Unpack Low Data.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PUNPCKLBW</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 60 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Interleave low-order bytes from xmm1 and xmm2/m128 into xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PUNPCKLWD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 61 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Interleave low-order words from xmm1 and xmm2/m128 into xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PUNPCKLDQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 62 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Interleave low-order doublewords from xmm1 and xmm2/m128 into xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>PUNPCKLQDQ</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 6C /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Interleave low-order quadword from xmm1 and xmm2/m128 into xmm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLBW</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 60 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Interleave low-order bytes from xmm2 and xmm3/m128 into xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLWD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 61 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Interleave low-order words from xmm2 and xmm3/m128 into xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLDQ</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 62 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Interleave low-order doublewords from xmm2 and xmm3/m128 into xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLQDQ</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 6C /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Interleave low-order quadword from xmm2 and xmm3/m128 into xmm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLBW</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 60 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Interleave low-order bytes from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLWD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 61 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Interleave low-order words from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLDQ</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 62 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Interleave low-order doublewords from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLQDQ</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 6C /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Interleave low-order quadword from ymm2 and ymm3/m256 into ymm1 register.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLBW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 60 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Interleave low-order bytes from xmm2 and xmm3/m128 into xmm1 register subject to write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLWD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F.WIG 61 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Interleave low-order words from xmm2 and xmm3/m128 into xmm1 register subject to write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLDQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W0 62 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Interleave low-order doublewords from xmm2 and xmm3/m128/m32bcst into xmm1 register subject to write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLQDQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 6C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Interleave low-order quadword from zmm2 and zmm3/m512/m64bcst into zmm1 register subject to write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLBW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 60 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Interleave low-order bytes from ymm2 and ymm3/m256 into ymm1 register subject to write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLWD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F.WIG 61 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Interleave low-order words from ymm2 and ymm3/m256 into ymm1 register subject to write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLDQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W0 62 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Interleave low-order doublewords from ymm2 and ymm3/m256/m32bcst into ymm1 register subject to write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLQDQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 6C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Interleave low-order quadword from ymm2 and ymm3/m256/m64bcst into ymm1 register subject to write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLBW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 60/r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Interleave low-order bytes from zmm2 and zmm3/m512 into zmm1 register subject to write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLWD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F.WIG 61/r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Interleave low-order words from zmm2 and zmm3/m512 into zmm1 register subject to write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLDQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W0 62 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Interleave low-order doublewords from zmm2 and zmm3/m512/m32bcst into zmm1 register subject to write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPUNPCKLQDQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 6C /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Interleave low-order quadword from zmm2 and zmm3/m512/m64bcst into zmm1 register subject to write mask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2--Shuffle Packed Values at 128-bit Granularity.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VSHUFF32X4</mnem>
			<args>ymm1{k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 23 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shuffle 128-bit packed single-precision floating-point values selected by imm8 from ymm2 and ymm3/m256/m32bcst and place results in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSHUFF32x4</mnem>
			<args>zmm1{k1}{z},zmm2,zmm3/m512/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 23 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shuffle 128-bit packed single-precision floating-point values selected by imm8 from zmm2 and zmm3/m512/m32bcst and place results in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSHUFF64X2</mnem>
			<args>ymm1{k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 23 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shuffle 128-bit packed double-precision floating-point values selected by imm8 from ymm2 and ymm3/m256/m64bcst and place results in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSHUFF64x2</mnem>
			<args>zmm1{k1}{z},zmm2,zmm3/m512/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 23 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shuffle 128-bit packed double-precision floating-point values selected by imm8 from zmm2 and zmm3/m512/m64bcst and place results in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSHUFI32X4</mnem>
			<args>ymm1{k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 43 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shuffle 128-bit packed double-word values selected by imm8 from ymm2 and ymm3/m256/m32bcst and place results in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSHUFI32x4</mnem>
			<args>zmm1{k1}{z},zmm2,zmm3/m512/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 43 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shuffle 128-bit packed double-word values selected by imm8 from zmm2 and zmm3/m512/m32bcst and place results in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSHUFI64X2</mnem>
			<args>ymm1{k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 43 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shuffle 128-bit packed quad-word values selected by imm8 from ymm2 and ymm3/m256/m64bcst and place results in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSHUFI64x2</mnem>
			<args>zmm1{k1}{z},zmm2,zmm3/m512/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 43 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shuffle 128-bit packed quad-word values selected by imm8 from zmm2 and zmm3/m512/m64bcst and place results in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>SHUFPD--Packed Interleave Shuffle of Pairs of Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>SHUFPD</mnem>
			<args>xmm1,xmm2/m128,imm8</args>
			<opc openc="RMI">66 0F C6 /r ib</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Shuffle two pairs of double-precision floating-point values from xmm1 and xmm2/m128 using imm8 to select from each pair, interleaved result is stored in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSHUFPD</mnem>
			<args>xmm1,xmm2,xmm3/m128,imm8</args>
			<opc openc="RVMI">VEX.NDS.128.66.0F.WIG C6 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shuffle two pairs of double-precision floating-point values from xmm2 and xmm3/m128 using imm8 to select from each pair, interleaved result is stored in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSHUFPD</mnem>
			<args>ymm1,ymm2,ymm3/m256,imm8</args>
			<opc openc="RVMI">VEX.NDS.256.66.0F.WIG C6 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Shuffle four pairs of double-precision floating-point values from ymm2 and ymm3/m256 using imm8 to select from each pair, interleaved result is stored in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSHUFPD</mnem>
			<args>xmm1{k1}{z},xmm2,xmm3/m128/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 C6 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shuffle two paris of double-precision floating-point values from xmm2 and xmm3/m128/m64bcst using imm8 to select from each pair. store interleaved results in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSHUFPD</mnem>
			<args>ymm1{k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 C6 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shuffle four paris of double-precision floating-point values from ymm2 and ymm3/m256/m64bcst using imm8 to select from each pair. store interleaved results in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSHUFPD</mnem>
			<args>zmm1{k1}{z},zmm2,zmm3/m512/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 C6 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shuffle eight paris of double-precision floating-point values from zmm2 and zmm3/m512/m64bcst using imm8 to select from each pair. store interleaved results in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVMI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>SHUFPS--Packed Interleave Shuffle of Quadruplets of Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>SHUFPS</mnem>
			<args>xmm1,xmm3/m128,imm8</args>
			<opc openc="RMI">0F C6 /r ib</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Select from quadruplet of single-precision floatingpoint values in xmm1 and xmm2/m128 using imm8, interleaved result pairs are stored in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSHUFPS</mnem>
			<args>xmm1,xmm2,xmm3/m128,imm8</args>
			<opc openc="RVMI">VEX.NDS.128.0F.WIG C6 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Select from quadruplet of single-precision floatingpoint values in xmm1 and xmm2/m128 using imm8, interleaved result pairs are stored in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSHUFPS</mnem>
			<args>ymm1,ymm2,ymm3/m256,imm8</args>
			<opc openc="RVMI">VEX.NDS.256.0F.WIG C6 /r ib</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Select from quadruplet of single-precision floatingpoint values in ymm2 and ymm3/m256 using imm8, interleaved result pairs are stored in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSHUFPS</mnem>
			<args>xmm1{k1}{z},xmm2,xmm3/m128/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.128.0F.W0 C6 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Select from quadruplet of single-precision floatingpoint values in xmm1 and xmm2/m128 using imm8, interleaved result pairs are stored in xmm1, subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSHUFPS</mnem>
			<args>ymm1{k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.256.0F.W0 C6 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Select from quadruplet of single-precision floatingpoint values in ymm2 and ymm3/m256 using imm8, interleaved result pairs are stored in ymm1, subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSHUFPS</mnem>
			<args>zmm1{k1}{z},zmm2,zmm3/m512/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.512.0F.W0 C6 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Select from quadruplet of single-precision floatingpoint values in zmm2 and zmm3/m512 using imm8, interleaved result pairs are stored in zmm1, subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVMI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>SQRTPD--Square Root of Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>SQRTPD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 51 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Computes Square Roots of the packed double-precision floating-point values in xmm2/m128 and stores the result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSQRTPD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.66.0F.WIG 51 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Computes Square Roots of the packed double-precision floating-point values in xmm2/m128 and stores the result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSQRTPD</mnem>
			<args>ymm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.66.0F.WIG 51 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Computes Square Roots of the packed double-precision floating-point values in ymm2/m256 and stores the result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSQRTPD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="FV">EVEX.128.66.0F.W1 51 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes Square Roots of the packed double-precision floating-point values in xmm2/m128/m64bcst and stores the result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSQRTPD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
			<opc openc="FV">EVEX.256.66.0F.W1 51 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes Square Roots of the packed double-precision floating-point values in ymm2/m256/m64bcst and stores the result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSQRTPD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.512.66.0F.W1 51 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes Square Roots of the packed double-precision floating-point values in zmm2/m512/m64bcst and stores the result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>SQRTPS--Square Root of Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>SQRTPS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 51 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Computes Square Roots of the packed single-precision floating-point values in xmm2/m128 and stores the result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSQRTPS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">VEX.128.0F.WIG 51 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Computes Square Roots of the packed single-precision floating-point values in xmm2/m128 and stores the result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSQRTPS</mnem>
			<args>ymm1,ymm2/m256</args>
			<opc openc="RM">VEX.256.0F.WIG 51/r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Computes Square Roots of the packed single-precision floating-point values in ymm2/m256 and stores the result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSQRTPS</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="FV">EVEX.128.0F.W0 51 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes Square Roots of the packed single-precision floating-point values in xmm2/m128/m32bcst and stores the result in xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSQRTPS</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
			<opc openc="FV">EVEX.256.0F.W0 51 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes Square Roots of the packed single-precision floating-point values in ymm2/m256/m32bcst and stores the result in ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSQRTPS</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.512.0F.W0 51/r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes Square Roots of the packed single-precision floating-point values in zmm2/m512/m32bcst and stores the result in zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>SQRTSD--Compute Square Root of Scalar Double-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>SQRTSD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">F2 0F 51/r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Computes square root of the low double-precision floatingpoint value in xmm2/m64 and stores the results in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSQRTSD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 51/r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Computes square root of the low double-precision floatingpoint value in xmm3/m64 and stores the results in xmm1. Also, upper double-precision floating-point value (bits[127:64]) from xmm2 is copied to xmm1[127:64].</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSQRTSD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 51/r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes square root of the low double-precision floatingpoint value in xmm3/m64 and stores the results in xmm1 under writemask k1. Also, upper double-precision floatingpoint value (bits[127:64]) from xmm2 is copied to xmm1[127:64].</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>SQRTSS--Compute Square Root of Scalar Single-Precision Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>SQRTSS</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">F3 0F 51 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Computes square root of the low single-precision floating-point value in xmm2/m32 and stores the results in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSQRTSS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 51 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Computes square root of the low single-precision floating-point value in xmm3/m32 and stores the results in xmm1. Also, upper single-precision floating-point values (bits[127:32]) from xmm2 are copied to xmm1[127:32].</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSQRTSS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 51 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes square root of the low single-precision floating-point value in xmm3/m32 and stores the results in xmm1 under writemask k1. Also, upper single-precision floating-point values (bits[127:32]) from xmm2 are copied to xmm1[127:32].</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPTERNLOGD/VPTERNLOGQ--Bitwise Ternary Logic.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPTERNLOGD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst,imm8</args>
			<opc openc="FV">EVEX.DDS.128.66.0F3A.W0 25 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise ternary logic taking xmm1, xmm2 and xmm3/m128/m32bcst as source operands and writing the result to xmm1 under writemask k1 with dword granularity. The immediate value determines the specific binary function being implemented.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTERNLOGD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.DDS.256.66.0F3A.W0 25 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise ternary logic taking ymm1, ymm2 and ymm3/m256/m32bcst as source operands and writing the result to ymm1 under writemask k1 with dword granularity. The immediate value determines the specific binary function being implemented.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTERNLOGD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst,imm8</args>
			<opc openc="FV">EVEX.DDS.512.66.0F3A.W0 25 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise ternary logic taking zmm1, zmm2 and zmm3/m512/m32bcst as source operands and writing the result to zmm1 under writemask k1 with dword granularity. The immediate value determines the specific binary function being implemented.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTERNLOGQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst,imm8</args>
			<opc openc="FV">EVEX.DDS.128.66.0F3A.W1 25 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise ternary logic taking xmm1, xmm2 and xmm3/m128/m64bcst as source operands and writing the result to xmm1 under writemask k1 with qword granularity. The immediate value determines the specific binary function being implemented.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTERNLOGQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.DDS.256.66.0F3A.W1 25 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise ternary logic taking ymm1, ymm2 and ymm3/m256/m64bcst as source operands and writing the result to ymm1 under writemask k1 with qword granularity. The immediate value determines the specific binary function being implemented.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTERNLOGQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst,imm8</args>
			<opc openc="FV">EVEX.DDS.512.66.0F3A.W1 25 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise ternary logic taking zmm1, zmm2 and zmm3/m512/m64bcst as source operands and writing the result to zmm1 under writemask k1 with qword granularity. The immediate value determines the specific binary function being implemented.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPTESTMB/VPTESTMW/VPTESTMD/VPTESTMQ--Logical AND and Set Mask.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTMB</mnem>
			<args>k2 {k1},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F38.W0 26 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise AND of packed byte integers in xmm2 and xmm3/m128 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTMB</mnem>
			<args>k2 {k1},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F38.W0 26 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise AND of packed byte integers in ymm2 and ymm3/m256 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTMB</mnem>
			<args>k2 {k1},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F38.W0 26 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise AND of packed byte integers in zmm2 and zmm3/m512 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTMW</mnem>
			<args>k2 {k1},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F38.W1 26 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise AND of packed word integers in xmm2 and xmm3/m128 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTMW</mnem>
			<args>k2 {k1},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F38.W1 26 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise AND of packed word integers in ymm2 and ymm3/m256 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTMW</mnem>
			<args>k2 {k1},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F38.W1 26 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise AND of packed word integers in zmm2 and zmm3/m512 and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTMD</mnem>
			<args>k2 {k1},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 27 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND of packed doubleword integers in xmm2 and xmm3/m128/m32bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTMD</mnem>
			<args>k2 {k1},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 27 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND of packed doubleword integers in ymm2 and ymm3/m256/m32bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTMD</mnem>
			<args>k2 {k1},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 27 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND of packed doubleword integers in zmm2 and zmm3/m512/m32bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTMQ</mnem>
			<args>k2 {k1},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 27 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND of packed quadword integers in xmm2 and xmm3/m128/m64bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTMQ</mnem>
			<args>k2 {k1},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 27 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND of packed quadword integers in ymm2 and ymm3/m256/m64bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPTESTMQ</mnem>
			<args>k2 {k1},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 27 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND of packed quadword integers in zmm2 and zmm3/m512/m64bcst and set mask k2 to reflect the zero/non-zero status of each element of the result, under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VPSRAVW/VPSRAVD/VPSRAVQ--Variable Bit Shift Right Arithmetic.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAVD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F38.W0 46 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm2 right by amount specified in the corresponding element of xmm3/m128 while shifting in sign bits.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAVD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F38.W0 46 /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Shift doublewords in ymm2 right by amount specified in the corresponding element of ymm3/m256 while shifting in sign bits.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAVW</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128</args>
			<opc openc="FVM">EVEX.NDS.128.66.0F38.W1 11 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in xmm2 right by amount specified in the corresponding element of xmm3/m128 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAVW</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256</args>
			<opc openc="FVM">EVEX.NDS.256.66.0F38.W1 11 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in ymm2 right by amount specified in the corresponding element of ymm3/m256 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAVW</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512</args>
			<opc openc="FVM">EVEX.NDS.512.66.0F38.W1 11 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift words in zmm2 right by amount specified in the corresponding element of zmm3/m512 while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAVD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 46 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in xmm2 right by amount specified in the corresponding element of xmm3/m128/m32bcst while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAVD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 46 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in ymm2 right by amount specified in the corresponding element of ymm3/m256/m32bcst while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAVD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 46 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift doublewords in zmm2 right by amount specified in the corresponding element of zmm3/m512/m32bcst while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAVQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 46 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in xmm2 right by amount specified in the corresponding element of xmm3/m128/m64bcst while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAVQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 46 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in ymm2 right by amount specified in the corresponding element of ymm3/m256/m64bcst while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPSRAVQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 46 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift quadwords in zmm2 right by amount specified in the corresponding element of zmm3/m512/m64bcst while shifting in sign bits using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PXOR/PXORD/PXORQ--Exclusive Or.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PXOR</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F EF /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Bitwise XOR of xmm2/m128 and xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPXOR</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG EF /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Bitwise XOR of xmm3/m128 and xmm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPXOR</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG EF /r</opc>
			<cpuid>
				<flag>AVX2</flag>
			</cpuid>
			<dscrp>Bitwise XOR of ymm3/m256 and ymm2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPXORD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W0 EF /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise XOR of packed doubleword integers in xmm2 and xmm3/m128 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPXORD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W0 EF /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise XOR of packed doubleword integers in ymm2 and ymm3/m256 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPXORD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W0 EF /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise XOR of packed doubleword integers in zmm2 and zmm3/m512/m32bcst using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPXORQ</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 EF /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise XOR of packed quadword integers in xmm2 and xmm3/m128 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPXORQ</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 EF /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise XOR of packed quadword integers in ymm2 and ymm3/m256 using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VPXORQ</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 EF /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise XOR of packed quadword integers in zmm2 and zmm3/m512/m64bcst using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRANGEPD--Range Restriction Calculation For Packed Pairs of Float64 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRANGEPD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.128.66.0F3A.W1 50 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Calculate two RANGE operation output value from 2 pairs of double-precision floating-point values in xmm2 and xmm3/m128/m32bcst, store the results to xmm1 under the writemask k1. Imm8 specifies the comparison and sign of the range operation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VRANGEPD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.256.66.0F3A.W1 50 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Calculate four RANGE operation output value from 4pairs of double-precision floating-point values in ymm2 and ymm3/m256/m32bcst, store the results to ymm1 under the writemask k1. Imm8 specifies the comparison and sign of the range operation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VRANGEPD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{sae},imm8</args>
			<opc openc="FV">EVEX.NDS.512.66.0F3A.W1 50 /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Calculate eight RANGE operation output value from 8 pairs of double-precision floating-point values in zmm2 and zmm3/m512/m32bcst, store the results to zmm1 under the writemask k1. Imm8 specifies the comparison and sign of the range operation.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRANGEPS--Range Restriction Calculation For Packed Pairs of Float32 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRANGEPS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.128.66.0F3A.W0 50 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Calculate four RANGE operation output value from 4 pairs of single-precision floating-point values in xmm2 and xmm3/m128/m32bcst, store the results to xmm1 under the writemask k1. Imm8 specifies the comparison and sign of the range operation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VRANGEPS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.NDS.256.66.0F3A.W0 50 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Calculate eight RANGE operation output value from 8 pairs of single-precision floating-point values in ymm2 and ymm3/m256/m32bcst, store the results to ymm1 under the writemask k1. Imm8 specifies the comparison and sign of the range operation.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VRANGEPS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{sae},imm8</args>
			<opc openc="FV">EVEX.NDS.512.66.0F3A.W0 50 /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Calculate 16 RANGE operation output value from 16 pairs of single-precision floating-point values in zmm2 and zmm3/m512/m32bcst, store the results to zmm1 under the writemask k1. Imm8 specifies the comparison and sign of the range operation.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRANGESD--Range Restriction Calculation From a pair of Scalar Float64 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRANGESD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae},imm8</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W1 51 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Calculate a RANGE operation output value from 2 doubleprecision floating-point values in xmm2 and xmm3/m64, store the output to xmm1 under writemask. Imm8 specifies the comparison and sign of the range operation.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRANGESS--Range Restriction Calculation From a Pair of Scalar Float32 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRANGESS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae},imm8</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W0 51 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Calculate a RANGE operation output value from 2 singleprecision floating-point values in xmm2 and xmm3/m32, store the output to xmm1 under writemask. Imm8 specifies the comparison and sign of the range operation.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRCP14PD--Compute Approximate Reciprocals of Packed Float64 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRCP14PD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.66.0F38.W1 4C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocals of the packed doubleprecision floating-point values in xmm2/m128/m64bcst and stores the results in xmm1. Under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VRCP14PD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.66.0F38.W1 4C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocals of the packed doubleprecision floating-point values in ymm2/m256/m64bcst and stores the results in ymm1. Under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VRCP14PD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst</args>
			<opc openc="FV">EVEX.512.66.0F38.W1 4C /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocals of the packed doubleprecision floating-point values in zmm2/m512/m64bcst and stores the results in zmm1. Under writemask.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRCP14SD--Compute Approximate Reciprocal of Scalar Float64 Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>T1S</mnem>
			<args>VRCP14SD xmm1 {k1}{z},xmm2,xmm3/m64</args>
			<opc openc="">EVEX.NDS.LIG.66.0F38.W1 4D /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocal of the scalar doubleprecision floating-point value in xmm3/m64 and stores the result in xmm1 using writemask k1. Also, upper double-precision floating-point value (bits[127:64]) from xmm2 is copied to xmm1[127:64].</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRCP14PS--Compute Approximate Reciprocals of Packed Float32 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRCP14PS</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="FV">EVEX.128.66.0F38.W0 4C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocals of the packed singleprecision floating-point values in xmm2/m128/m32bcst and stores the results in xmm1. Under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VRCP14PS</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
			<opc openc="FV">EVEX.256.66.0F38.W0 4C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocals of the packed singleprecision floating-point values in ymm2/m256/m32bcst and stores the results in ymm1. Under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VRCP14PS</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst</args>
			<opc openc="FV">EVEX.512.66.0F38.W0 4C /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocals of the packed singleprecision floating-point values in zmm2/m512/m32bcst and stores the results in zmm1. Under writemask.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRCP14SS--Compute Approximate Reciprocal of Scalar Float32 Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRCP14SS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W0 4D /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocal of the scalar singleprecision floating-point value in xmm3/m32 and stores the results in xmm1 using writemask k1. Also, upper doubleprecision floating-point value (bits[127:32]) from xmm2 is copied to xmm1[127:32].</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VREDUCEPD--Perform Reduction Transformation on Packed Float64 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VREDUCEPD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
			<opc openc="FV">EVEX.128.66.0F3A.W1 56 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Perform reduction transformation on packed double-precision floating point values in xmm2/m128/m32bcst by subtracting a number of fraction bits specified by the imm8 field. Stores the result in xmm1 register under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VREDUCEPD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.256.66.0F3A.W1 56 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Perform reduction transformation on packed double-precision floating point values in ymm2/m256/m32bcst by subtracting a number of fraction bits specified by the imm8 field. Stores the result in ymm1 register under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VREDUCEPD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{sae},imm8</args>
			<opc openc="FV">EVEX.512.66.0F3A.W1 56 /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Perform reduction transformation on double-precision floating point values in zmm2/m512/m32bcst by subtracting a number of fraction bits specified by the imm8 field. Stores the result in zmm1 register under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VREDUCESD--Perform a Reduction Transformation on a Scalar Float64 Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VREDUCESD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae},imm8</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W1 57 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Perform a reduction transformation on a scalar double-precision floating point value in xmm3/m64 by subtracting a number of fraction bits specified by the imm8 field. Also, upper double precision floating-point value (bits[127:64]) from xmm2 are copied to xmm1[127:64]. Stores the result in xmm1 register.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VREDUCEPS--Perform Reduction Transformation on Packed Float32 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VREDUCEPS</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
			<opc openc="FV">EVEX.128.66.0F3A.W0 56 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Perform reduction transformation on packed single-precision floating point values in xmm2/m128/m32bcst by subtracting a number of fraction bits specified by the imm8 field. Stores the result in xmm1 register under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VREDUCEPS</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.256.66.0F3A.W0 56 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Perform reduction transformation on packed single-precision floating point values in ymm2/m256/m32bcst by subtracting a number of fraction bits specified by the imm8 field. Stores the result in ymm1 register under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VREDUCEPS</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{sae},imm8</args>
			<opc openc="FV">EVEX.512.66.0F3A.W0 56 /r ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Perform reduction transformation on packed single-precision floating point values in zmm2/m512/m32bcst by subtracting a number of fraction bits specified by the imm8 field. Stores the result in zmm1 register under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VREDUCESS--Perform a Reduction Transformation on a Scalar Float32 Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VREDUCESS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae},imm8</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W0 57 /r /ib</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Perform a reduction transformation on a scalar single-precision floating point value in xmm3/m32 by subtracting a number of fraction bits specified by the imm8 field. Also, upper single precision floating-point values (bits[127:32]) from xmm2 are copied to xmm1[127:32]. Stores the result in xmm1 register.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRNDSCALEPD--Round Packed Float64 Values To Include A Given Number Of Fraction Bits.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRNDSCALEPD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst,imm8</args>
			<opc openc="FV">EVEX.128.66.0F3A.W1 09 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rounds packed double-precision floating point values in xmm2/m128/m64bcst to a number of fraction bits specified by the imm8 field. Stores the result in xmm1 register. Under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VRNDSCALEPD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst,imm8</args>
			<opc openc="FV">EVEX.256.66.0F3A.W1 09 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rounds packed double-precision floating point values in ymm2/m256/m64bcst to a number of fraction bits specified by the imm8 field. Stores the result in ymm1 register. Under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VRNDSCALEPD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst{sae},imm8</args>
			<opc openc="FV">EVEX.512.66.0F3A.W1 09 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rounds packed double-precision floating-point values in zmm2/m512/m64bcst to a number of fraction bits specified by the imm8 field. Stores the result in zmm1 register using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRNDSCALESD--Round Scalar Float64 Value To Include A Given Number Of Fraction Bits.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRNDSCALESD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{sae},imm8</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W1 0B /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rounds scalar double-precision floating-point value in xmm3/m64 to a number of fraction bits specified by the imm8 field. Stores the result in xmm1 register.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>Imm8</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRNDSCALEPS--Round Packed Float32 Values To Include A Given Number Of Fraction Bits.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRNDSCALEPS</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst,imm8</args>
			<opc openc="FV">EVEX.128.66.0F3A.W0 08 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rounds packed single-precision floating point values in xmm2/m128/m32bcst to a number of fraction bits specified by the imm8 field. Stores the result in xmm1 register. Under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VRNDSCALEPS</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst,imm8</args>
			<opc openc="FV">EVEX.256.66.0F3A.W0 08 /r ib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rounds packed single-precision floating point values in ymm2/m256/m32bcst to a number of fraction bits specified by the imm8 field. Stores the result in ymm1 register. Under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VRNDSCALEPS</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst{sae},imm8</args>
			<opc openc="FV">EVEX.512.66.0F3A.W0 08 /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rounds packed single-precision floating-point values in zmm2/m512/m32bcst to a number of fraction bits specified by the imm8 field. Stores the result in zmm1 register using writemask.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRNDSCALESS--Round Scalar Float32 Value To Include A Given Number Of Fraction Bits.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRNDSCALESS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{sae},imm8</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F3A.W0 0A /r ib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Rounds scalar single-precision floating-point value in xmm3/m32 to a number of fraction bits specified by the imm8 field. Stores the result in xmm1 register under writemask.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRSQRT14PD--Compute Approximate Reciprocals of Square Roots of Packed Float64 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRSQRT14PD</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m64bcst</args>
			<opc openc="FV">EVEX.128.66.0F38.W1 4E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocal square roots of the packed double-precision floating-point values in xmm2/m128/m64bcst and stores the results in xmm1. Under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VRSQRT14PD</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m64bcst</args>
			<opc openc="FV">EVEX.256.66.0F38.W1 4E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocal square roots of the packed double-precision floating-point values in ymm2/m256/m64bcst and stores the results in ymm1. Under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VRSQRT14PD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst</args>
			<opc openc="FV">EVEX.512.66.0F38.W1 4E /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocal square roots of the packed double-precision floating-point values in zmm2/m512/m64bcst and stores the results in zmm1 under writemask.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRSQRT14SD--Compute Approximate Reciprocal of Square Root of Scalar Float64 Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRSQRT14SD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W1 4F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocal square root of the scalar double-precision floating-point value in xmm3/m64 and stores the result in the low quadword element of xmm1 using writemask k1. Bits[127:64] of xmm2 is copied to xmm1[127:64].</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRSQRT14PS--Compute Approximate Reciprocals of Square Roots of Packed Float32 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRSQRT14PS</mnem>
			<args>xmm1 {k1}{z},xmm2/m128/m32bcst</args>
			<opc openc="FV">EVEX.128.66.0F38.W0 4E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocal square roots of the packed single-precision floating-point values in xmm2/m128/m32bcst and stores the results in xmm1. Under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VRSQRT14PS</mnem>
			<args>ymm1 {k1}{z},ymm2/m256/m32bcst</args>
			<opc openc="FV">EVEX.256.66.0F38.W0 4E /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocal square roots of the packed single-precision floating-point values in ymm2/m256/m32bcst and stores the results in ymm1. Under writemask.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VRSQRT14PS</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst</args>
			<opc openc="FV">EVEX.512.66.0F38.W0 4E /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocal square roots of the packed single-precision floating-point values in zmm2/m512/m32bcst and stores the results in zmm1. Under writemask.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRSQRT14SS--Compute Approximate Reciprocal of Square Root of Scalar Float32 Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRSQRT14SS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W0 4F /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocal square root of the scalar single-precision floating-point value in xmm3/m32 and stores the result in the low doubleword element of xmm1 using writemask k1. Bits[127:32] of xmm2 is copied to xmm1[127:32].</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VSCALEFPD--Scale Packed Float64 Values With Float64 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VSCALEFPD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W1 2C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Scale the packed double-precision floating-point values in xmm2 using values from xmm3/m128/m64bcst. Under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCALEFPD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W1 2C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Scale the packed double-precision floating-point values in ymm2 using values from ymm3/m256/m64bcst. Under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCALEFPD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W1 2C /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Scale the packed double-precision floating-point values in zmm2 using values from zmm3/m512/m64bcst. Under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VSCALEFSD--Scale Scalar Float64 Values With Float64 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VSCALEFSD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W1 2D /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Scale the scalar double-precision floating-point values in xmm2 using the value from xmm3/m64. Under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VSCALEFPS--Scale Packed Float32 Values With Float32 Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VSCALEFPS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F38.W0 2C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Scale the packed single-precision floating-point values in xmm2 using values from xmm3/m128/m32bcst. Under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCALEFPS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F38.W0 2C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Scale the packed single-precision values in ymm2 using floating point values from ymm3/m256/m32bcst. Under writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCALEFPS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F38.W0 2C /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Scale the packed single-precision floating-point values in zmm2 using floating-point values from zmm3/m512/m32bcst. Under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VSCALEFSS--Scale Scalar Float32 Value With Float32 Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VSCALEFSS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W0 2D /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Scale the scalar single-precision floating-point value in xmm2 using floating-point value from xmm3/m32. Under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VSCATTERDPS/VSCATTERDPD/VSCATTERQPS/VSCATTERQPD--Scatter Packed Single, Packed Double with Signed Dword and Qword Indices.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERDPS</mnem>
			<args>vm32x {k1},xmm1</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 A2 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, scatter single-precision floating-point values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERDPS</mnem>
			<args>vm32y {k1},ymm1</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 A2 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, scatter single-precision floating-point values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERDPS</mnem>
			<args>vm32z {k1},zmm1</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 A2 /vsib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, scatter single-precision floating-point values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERDPD</mnem>
			<args>vm32x {k1},xmm1</args>
			<opc openc="T1S">EVEX.128.66.0F38.W1 A2 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, scatter double-precision floating-point values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERDPD</mnem>
			<args>vm32x {k1},ymm1</args>
			<opc openc="T1S">EVEX.256.66.0F38.W1 A2 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, scatter double-precision floating-point values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERDPD</mnem>
			<args>vm32y {k1},zmm1</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 A2 /vsib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed dword indices, scatter double-precision floating-point values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERQPS</mnem>
			<args>vm64x {k1},xmm1</args>
			<opc openc="T1S">EVEX.128.66.0F38.W0 A3 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, scatter single-precision floating-point values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERQPS</mnem>
			<args>vm64y {k1},xmm1</args>
			<opc openc="T1S">EVEX.256.66.0F38.W0 A3 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, scatter single-precision floating-point values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERQPS</mnem>
			<args>vm64z {k1},ymm1</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 A3 /vsib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, scatter single-precision floating-point values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERQPD</mnem>
			<args>vm64x {k1},xmm1</args>
			<opc openc="T1S">EVEX.128.66.0F38.W1 A3 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, scatter double-precision floating-point values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERQPD</mnem>
			<args>vm64y {k1},ymm1</args>
			<opc openc="T1S">EVEX.256.66.0F38.W1 A3 /vsib</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, scatter double-precision floating-point values to memory using writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERQPD</mnem>
			<args>vm64z {k1},zmm1</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 A3 /vsib</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Using signed qword indices, scatter double-precision floating-point values to memory using writemask k1.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>SUBPD--Subtract Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>SUBPD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 5C /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Subtract packed double-precision floating-point values in xmm2/mem from xmm1 and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSUBPD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 5C /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Subtract packed double-precision floating-point values in xmm3/mem from xmm2 and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSUBPD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 5C /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Subtract packed double-precision floating-point values in ymm3/mem from ymm2 and store result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSUBPD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 5C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Subtract packed double-precision floating-point values from xmm3/m128/m64bcst to xmm2 and store result in xmm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSUBPD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 5C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Subtract packed double-precision floating-point values from ymm3/m256/m64bcst to ymm2 and store result in ymm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSUBPD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 5C /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Subtract packed double-precision floating-point values from zmm3/m512/m64bcst to zmm2 and store result in zmm1 with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>SUBPS--Subtract Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>SUBPS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 5C /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Subtract packed single-precision floating-point values in xmm2/mem from xmm1 and store result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSUBPS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.0F.WIG 5C /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Subtract packed single-precision floating-point values in xmm3/mem from xmm2 and stores result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSUBPS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.0F.WIG 5C /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Subtract packed single-precision floating-point values in ymm3/mem from ymm2 and stores result in ymm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSUBPS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.0F.W0 5C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Subtract packed single-precision floating-point values from xmm3/m128/m32bcst to xmm2 and stores result in xmm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSUBPS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.0F.W0 5C /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Subtract packed single-precision floating-point values from ymm3/m256/m32bcst to ymm2 and stores result in ymm1 with writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSUBPS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst{er}</args>
			<opc openc="FV">EVEX.NDS.512.0F.W0 5C /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Subtract packed single-precision floating-point values in zmm3/m512/m32bcst from zmm2 and stores result in zmm1 with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>SUBSD--Subtract Scalar Double-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>SUBSD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">F2 0F 5C /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Subtract the low double-precision floating-point value in xmm2/m64 from xmm1 and store the result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSUBSD</mnem>
			<args>xmm1,xmm2,xmm3/m64</args>
			<opc openc="RVM">VEX.NDS.128.F2.0F.WIG 5C /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Subtract the low double-precision floating-point value in xmm3/m64 from xmm2 and store the result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSUBSD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64{er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F2.0F.W1 5C /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Subtract the low double-precision floating-point value in xmm3/m64 from xmm2 and store the result in xmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>SUBSS--Subtract Scalar Single-Precision Floating-Point Value.</brief>
		<ins x32m="V" x64m="V">
			<mnem>SUBSS</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">F3 0F 5C /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Subtract the low single-precision floating-point value in xmm2/m32 from xmm1 and store the result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSUBSS</mnem>
			<args>xmm1,xmm2,xmm3/m32</args>
			<opc openc="RVM">VEX.NDS.128.F3.0F.WIG 5C /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Subtract the low single-precision floating-point value in xmm3/m32 from xmm2 and store the result in xmm1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSUBSS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32{er}</args>
			<opc openc="T1S">EVEX.NDS.LIG.F3.0F.W0 5C /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Subtract the low single-precision floating-point value in xmm3/m32 from xmm2 and store the result in xmm1 under writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>UCOMISD--Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS.</brief>
		<ins x32m="V" x64m="V">
			<mnem>UCOMISD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">66 0F 2E /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/mem64 and set the EFLAGS flags accordingly.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUCOMISD</mnem>
			<args>xmm1,xmm2/m64</args>
			<opc openc="RM">VEX.128.66.0F.WIG 2E /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/mem64 and set the EFLAGS flags accordingly.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUCOMISD</mnem>
			<args>xmm1,xmm2/m64{sae}</args>
			<opc openc="T1S">EVEX.LIG.66.0F.W1 2E /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare low double-precision floating-point values in xmm1 and xmm2/m64 and set the EFLAGS flags accordingly.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>UCOMISS--Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS.</brief>
		<ins x32m="V" x64m="V">
			<mnem>UCOMISS</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">0F 2E /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUCOMISS</mnem>
			<args>xmm1,xmm2/m32</args>
			<opc openc="RM">VEX.128.0F.WIG 2E /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUCOMISS</mnem>
			<args>xmm1,xmm2/m32{sae}</args>
			<opc openc="T1S">EVEX.LIG.0F.W0 2E /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Compare low single-precision floating-point values in xmm1 and xmm2/mem32 and set the EFLAGS flags accordingly.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>UNPCKHPD--Unpack and Interleave High Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>UNPCKHPD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 15 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves double-precision floating-point values from high quadwords of xmm1 and xmm2/m128.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKHPD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 15 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves double-precision floating-point values from high quadwords of xmm2 and xmm3/m128.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKHPD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 15 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves double-precision floating-point values from high quadwords of ymm2 and ymm3/m256.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKHPD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 15 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves double precision floating-point values from high quadwords of xmm2 and xmm3/m128/m64bcst subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKHPD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 15 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves double precision floating-point values from high quadwords of ymm2 and ymm3/m256/m64bcst subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKHPD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 15 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves double-precision floating-point values from high quadwords of zmm2 and zmm3/m512/m64bcst subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>UNPCKHPS--Unpack and Interleave High Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>UNPCKHPS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 15 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of xmm1 and xmm2/m128.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKHPS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.0F.WIG 15 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of xmm2 and xmm3/m128.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKHPS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.0F.WIG 15 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of ymm2 and ymm3/m256.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKHPS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.0F.W0 15 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of xmm2 and xmm3/m128/m32bcst and write result to xmm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKHPS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.0F.W0 15 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of ymm2 and ymm3/m256/m32bcst and write result to ymm1 subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKHPS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.0F.W0 15 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves single-precision floating-point values from high quadwords of zmm2 and zmm3/m512/m32bcst and write result to zmm1 subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>UNPCKLPD--Unpack and Interleave Low Packed Double-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>UNPCKLPD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 14 /r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves double-precision floating-point values from low quadwords of xmm1 and xmm2/m128.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKLPD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 14 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves double-precision floating-point values from low quadwords of xmm2 and xmm3/m128.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKLPD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 14 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves double-precision floating-point values from low quadwords of ymm2 and ymm3/m256.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKLPD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 14 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves double precision floating-point values from low quadwords of xmm2 and xmm3/m128/m64bcst subject to write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKLPD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 14 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves double precision floating-point values from low quadwords of ymm2 and ymm3/m256/m64bcst subject to write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKLPD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 14 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves double-precision floating-point values from low quadwords of zmm2 and zmm3/m512/m64bcst subject to write mask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>UNPCKLPS--Unpack and Interleave Low Packed Single-Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>UNPCKLPS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 14 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of xmm1 and xmm2/m128.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKLPS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.0F.WIG 14 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of xmm2 and xmm3/m128.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>ymm1,ymm2,ymm3/m256</mnem>
			<args>void</args>
			<opc openc="RVM">VEX.NDS.256.0F.WIG 14 /r VUNPCKLPS</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of ymm2 and ymm3/m256.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKLPS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.0F.W0 14 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of xmm2 and xmm3/mem and write result to xmm1 subject to write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKLPS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.0F.W0 14 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of ymm2 and ymm3/mem and write result to ymm1 subject to write mask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VUNPCKLPS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.0F.W0 14 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Unpacks and Interleaves single-precision floating-point values from low quadwords of zmm2 and zmm3/m512/m32bcst and write result to zmm1 subject to write mask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>XORPD--Bitwise Logical XOR of Packed Double Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>XORPD</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">66 0F 57/r</opc>
			<cpuid>
				<flag>SSE2</flag>
			</cpuid>
			<dscrp>Return the bitwise logical XOR of packed doubleprecision floating-point values in xmm1 and xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VXORPD</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.66.0F.WIG 57 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the bitwise logical XOR of packed doubleprecision floating-point values in xmm2 and xmm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VXORPD</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.66.0F.WIG 57 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the bitwise logical XOR of packed doubleprecision floating-point values in ymm2 and ymm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VXORPD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m64bcst</args>
			<opc openc="FV">EVEX.NDS.128.66.0F.W1 57 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical XOR of packed doubleprecision floating-point values in xmm2 and xmm3/m128/m64bcst subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VXORPD</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m64bcst</args>
			<opc openc="FV">EVEX.NDS.256.66.0F.W1 57 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical XOR of packed doubleprecision floating-point values in ymm2 and ymm3/m256/m64bcst subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VXORPD</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m64bcst</args>
			<opc openc="FV">EVEX.NDS.512.66.0F.W1 57 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical XOR of packed doubleprecision floating-point values in zmm2 and zmm3/m512/m64bcst subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>XORPS--Bitwise Logical XOR of Packed Single Precision Floating-Point Values.</brief>
		<ins x32m="V" x64m="V">
			<mnem>XORPS</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 57 /r</opc>
			<cpuid>
				<flag>SSE</flag>
			</cpuid>
			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in xmm1 and xmm2/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VXORPS</mnem>
			<args>xmm1,xmm2,xmm3/m128</args>
			<opc openc="RVM">VEX.NDS.128.0F.WIG 57 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in xmm2 and xmm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VXORPS</mnem>
			<args>ymm1,ymm2,ymm3/m256</args>
			<opc openc="RVM">VEX.NDS.256.0F.WIG 57 /r</opc>
			<cpuid>
				<flag>AVX</flag>
			</cpuid>
			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in ymm2 and ymm3/mem.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VXORPS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m128/m32bcst</args>
			<opc openc="FV">EVEX.NDS.128.0F.W0 57 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in xmm2 and xmm3/m128/m32bcst subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VXORPS</mnem>
			<args>ymm1 {k1}{z},ymm2,ymm3/m256/m32bcst</args>
			<opc openc="FV">EVEX.NDS.256.0F.W0 57 /r</opc>
			<cpuid>
				<flag>AVX512VL</flag>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in ymm2 and ymm3/m256/m32bcst subject to writemask k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VXORPS</mnem>
			<args>zmm1 {k1}{z},zmm2,zmm3/m512/m32bcst</args>
			<opc openc="FV">EVEX.NDS.512.0F.W0 57 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Return the bitwise logical XOR of packed singleprecision floating-point values in zmm2 and zmm3/m512/m32bcst subject to writemask k1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RVM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>KADDW/KADDB/KADDQ/KADDD--ADD Two Masks.</brief>
		<ins x32m="V" x64m="V">
			<mnem>KADDW</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.0F.W0 4A /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Add 16 bits masks in k2 and k3 and place result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KADDB</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.66.0F.W0 4A /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Add 8 bits masks in k2 and k3 and place result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KADDQ</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.0F.W1 4A /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add 64 bits masks in k2 and k3 and place result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KADDD</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.66.0F.W1 4A /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Add 32 bits masks in k2 and k3 and place result in k1.</dscrp>
		</ins>
		<oprndenc openc="RVR">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.1vvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>KANDW/KANDB/KANDQ/KANDD--Bitwise Logical AND Masks.</brief>
		<ins x32m="V" x64m="V">
			<mnem>KANDW</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.NDS.L1.0F.W0 41 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND 16 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KANDB</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.66.0F.W0 41 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Bitwise AND 8 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KANDQ</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.0F.W1 41 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise AND 64 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KANDD</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.66.0F.W1 41 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise AND 32 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<oprndenc openc="RVR">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.1vvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>KANDNW/KANDNB/KANDNQ/KANDND--Bitwise Logical AND NOT Masks.</brief>
		<ins x32m="V" x64m="V">
			<mnem>KANDNW</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.NDS.L1.0F.W0 42 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise AND NOT 16 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KANDNB</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.66.0F.W0 42 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Bitwise AND NOT 8 bits masks k1 and k2 and place result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KANDNQ</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.0F.W1 42 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise AND NOT 64 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KANDND</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.66.0F.W1 42 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise AND NOT 32 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<oprndenc openc="RVR">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.1vvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>KMOVW/KMOVB/KMOVQ/KMOVD--Move from and to Mask Registers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>KMOVW</mnem>
			<args>k1,k2/m16</args>
			<opc openc="RM">VEX.L0.0F.W0 90 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move 16 bits mask from k2/m16 and store the result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KMOVB</mnem>
			<args>k1,k2/m8</args>
			<opc openc="RM">VEX.L0.66.0F.W0 90 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Move 8 bits mask from k2/m8 and store the result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KMOVQ</mnem>
			<args>k1,k2/m64</args>
			<opc openc="RM">VEX.L0.0F.W1 90 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move 64 bits mask from k2/m64 and store the result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KMOVD</mnem>
			<args>k1,k2/m32</args>
			<opc openc="RM">VEX.L0.66.0F.W1 90 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move 32 bits mask from k2/m32 and store the result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KMOVW</mnem>
			<args>m16,k1</args>
			<opc openc="MR">VEX.L0.0F.W0 91 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move 16 bits mask from k1 and store the result in m16.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KMOVB</mnem>
			<args>m8,k1</args>
			<opc openc="MR">VEX.L0.66.0F.W0 91 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Move 8 bits mask from k1 and store the result in m8.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KMOVQ</mnem>
			<args>m64,k1</args>
			<opc openc="MR">VEX.L0.0F.W1 91 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move 64 bits mask from k1 and store the result in m64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KMOVD</mnem>
			<args>m32,k1</args>
			<opc openc="MR">VEX.L0.66.0F.W1 91 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move 32 bits mask from k1 and store the result in m32.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KMOVW</mnem>
			<args>k1,r32</args>
			<opc openc="RR">VEX.L0.0F.W0 92 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move 16 bits mask from r32 to k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KMOVB</mnem>
			<args>k1,r32</args>
			<opc openc="RR">VEX.L0.66.0F.W0 92 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Move 8 bits mask from r32 to k1.</dscrp>
		</ins>
		<ins x32m="I" x64m="V">
			<mnem>KMOVQ</mnem>
			<args>k1,r64</args>
			<opc openc="RR">VEX.L0.F2.0F.W1 92 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move 64 bits mask from r64 to k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KMOVD</mnem>
			<args>k1,r32</args>
			<opc openc="RR">VEX.L0.F2.0F.W0 92 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move 32 bits mask from r32 to k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KMOVW</mnem>
			<args>r32,k1</args>
			<opc openc="RR">VEX.L0.0F.W0 93 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Move 16 bits mask from k1 to r32.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KMOVB</mnem>
			<args>r32,k1</args>
			<opc openc="RR">VEX.L0.66.0F.W0 93 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Move 8 bits mask from k1 to r32.</dscrp>
		</ins>
		<ins x32m="I" x64m="V">
			<mnem>KMOVQ</mnem>
			<args>r64,k1</args>
			<opc openc="RR">VEX.L0.F2.0F.W1 93 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move 64 bits mask from k1 to r64.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KMOVD</mnem>
			<args>r32,k1</args>
			<opc openc="RR">VEX.L0.F2.0F.W0 93 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Move 32 bits mask from k1 to r32.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="MR">
			<oprnd1>ModRM:r/m(w, ModRM:[7:6] must not be 11b)</oprnd1>
			<oprnd2>ModRM:reg(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc="RR">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>KUNPCKBW/KUNPCKWD/KUNPCKDQ--Unpack for Mask Registers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>KUNPCKBW</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.NDS.L1.66.0F.W0 4B /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Unpack and interleave 8 bits masks in k2 and k3 and write word result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KUNPCKWD</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.NDS.L1.0F.W0 4B /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Unpack and interleave 16 bits in k2 and k3 and write doubleword result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KUNPCKDQ</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.NDS.L1.0F.W1 4B /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Unpack and interleave 32 bits masks in k2 and k3 and write quadword result in k1.</dscrp>
		</ins>
		<oprndenc openc="RVR">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.1vvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>KNOTW/KNOTB/KNOTQ/KNOTD--NOT Mask Register.</brief>
		<ins x32m="V" x64m="V">
			<mnem>KNOTW</mnem>
			<args>k1,k2</args>
			<opc openc="RR">VEX.L0.0F.W0 44 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise NOT of 16 bits mask k2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KNOTB</mnem>
			<args>k1,k2</args>
			<opc openc="RR">VEX.L0.66.0F.W0 44 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Bitwise NOT of 8 bits mask k2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KNOTQ</mnem>
			<args>k1,k2</args>
			<opc openc="RR">VEX.L0.0F.W1 44 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise NOT of 64 bits mask k2.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KNOTD</mnem>
			<args>k1,k2</args>
			<opc openc="RR">VEX.L0.66.0F.W1 44 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise NOT of 32 bits mask k2.</dscrp>
		</ins>
		<oprndenc openc="RR">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>KORW/KORB/KORQ/KORD--Bitwise Logical OR Masks.</brief>
		<ins x32m="V" x64m="V">
			<mnem>KORW</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.NDS.L1.0F.W0 45 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise OR 16 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KORB</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.66.0F.W0 45 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Bitwise OR 8 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KORQ</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.0F.W1 45 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise OR 64 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KORD</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.66.0F.W1 45 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise OR 32 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<oprndenc openc="RVR">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.1vvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>KORTESTW/KORTESTB/KORTESTQ/KORTESTD--OR Masks And Set Flags.</brief>
		<ins x32m="V" x64m="V">
			<mnem>KORTESTW</mnem>
			<args>k1,k2</args>
			<opc openc="RR">VEX.L0.0F.W0 98 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise OR 16 bits masks k1 and k2 and update ZF and CF accordingly.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KORTESTB</mnem>
			<args>k1,k2</args>
			<opc openc="RR">VEX.L0.66.0F.W0 98 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Bitwise OR 8 bits masks k1 and k2 and update ZF and CF accordingly.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KORTESTQ</mnem>
			<args>k1,k2</args>
			<opc openc="RR">VEX.L0.0F.W1 98 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise OR 64 bits masks k1 and k2 and update ZF and CF accordingly.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KORTESTD</mnem>
			<args>k1,k2</args>
			<opc openc="RR">VEX.L0.66.0F.W1 98 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise OR 32 bits masks k1 and k2 and update ZF and CF accordingly.</dscrp>
		</ins>
		<oprndenc openc="RR">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>KSHIFTLW/KSHIFTLB/KSHIFTLQ/KSHIFTLD--Shift Left Mask Registers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>KSHIFTLW</mnem>
			<args>k1,k2,imm8</args>
			<opc openc="RRI">VEX.L0.66.0F3A.W1 32 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift left 16 bits in k2 by immediate and write result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KSHIFTLB</mnem>
			<args>k1,k2,imm8</args>
			<opc openc="RRI">VEX.L0.66.0F3A.W0 32 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Shift left 8 bits in k2 by immediate and write result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KSHIFTLQ</mnem>
			<args>k1,k2,imm8</args>
			<opc openc="RRI">VEX.L0.66.0F3A.W1 33 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift left 64 bits in k2 by immediate and write result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KSHIFTLD</mnem>
			<args>k1,k2,imm8</args>
			<opc openc="RRI">VEX.L0.66.0F3A.W0 33 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift left 32 bits in k2 by immediate and write result in k1.</dscrp>
		</ins>
		<oprndenc openc="RRI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>KSHIFTRW/KSHIFTRB/KSHIFTRQ/KSHIFTRD--Shift Right Mask Registers.</brief>
		<ins x32m="V" x64m="V">
			<mnem>KSHIFTRW</mnem>
			<args>k1,k2,imm8</args>
			<opc openc="RRI">VEX.L0.66.0F3A.W1 30 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Shift right 16 bits in k2 by immediate and write result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KSHIFTRB</mnem>
			<args>k1,k2,imm8</args>
			<opc openc="RRI">VEX.L0.66.0F3A.W0 30 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Shift right 8 bits in k2 by immediate and write result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KSHIFTRQ</mnem>
			<args>k1,k2,imm8</args>
			<opc openc="RRI">VEX.L0.66.0F3A.W1 31 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift right 64 bits in k2 by immediate and write result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KSHIFTRD</mnem>
			<args>k1,k2,imm8</args>
			<opc openc="RRI">VEX.L0.66.0F3A.W0 31 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Shift right 32 bits in k2 by immediate and write result in k1.</dscrp>
		</ins>
		<oprndenc openc="RRI">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>KXNORW/KXNORB/KXNORQ/KXNORD--Bitwise Logical XNOR Masks.</brief>
		<ins x32m="V" x64m="V">
			<mnem>KXNORW</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.NDS.L1.0F.W0 46 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise XNOR 16 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KXNORB</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.66.0F.W0 46 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Bitwise XNOR 8 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KXNORQ</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.0F.W1 46 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise XNOR 64 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KXNORD</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.66.0F.W1 46 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise XNOR 32 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<oprndenc openc="RVR">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.1vvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>KTESTW/KTESTB/KTESTQ/KTESTD--Packed Bit Test Masks and Set Flags.</brief>
		<ins x32m="V" x64m="V">
			<mnem>RR</mnem>
			<args>KTESTW k1,k2</args>
			<opc openc="">VEX.L0.0F.W0 99 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Set ZF and CF depending on sign bit AND and ANDN of 16 bits mask register sources.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>RR</mnem>
			<args>KTESTB k1,k2</args>
			<opc openc="">VEX.L0.66.0F.W0 99 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Set ZF and CF depending on sign bit AND and ANDN of 8 bits mask register sources.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>RR</mnem>
			<args>KTESTQ k1,k2</args>
			<opc openc="">VEX.L0.0F.W1 99 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Set ZF and CF depending on sign bit AND and ANDN of 64 bits mask register sources.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>RR</mnem>
			<args>KTESTD k1,k2</args>
			<opc openc="">VEX.L0.66.0F.W1 99 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Set ZF and CF depending on sign bit AND and ANDN of 32 bits mask register sources.</dscrp>
		</ins>
		<oprndenc openc="RR">
			<oprnd1>ModRM:reg(r)</oprnd1>
			<oprnd2>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>KXORW/KXORB/KXORQ/KXORD--Bitwise Logical XOR Masks.</brief>
		<ins x32m="V" x64m="V">
			<mnem>KXORW</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.NDS.L1.0F.W0 47 /r</opc>
			<cpuid>
				<flag>AVX512F</flag>
			</cpuid>
			<dscrp>Bitwise XOR 16 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KXORB</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.66.0F.W0 47 /r</opc>
			<cpuid>
				<flag>AVX512DQ</flag>
			</cpuid>
			<dscrp>Bitwise XOR 8 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KXORQ</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.0F.W1 47 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise XOR 64 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>KXORD</mnem>
			<args>k1,k2,k3</args>
			<opc openc="RVR">VEX.L1.66.0F.W1 47 /r</opc>
			<cpuid>
				<flag>AVX512BW</flag>
			</cpuid>
			<dscrp>Bitwise XOR 32 bits masks k2 and k3 and place result in k1.</dscrp>
		</ins>
		<oprndenc openc="RVR">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>VEX.1vvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r, ModRM:[7:6] must be 11b)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VEXP2PD--Approximation to the Exponential 2^x of Packed Double-Precision Floating-Point Values with Less Than 2^-23 Relative Error.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VEXP2PD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst {sae}</args>
			<opc openc="FV">EVEX.512.66.0F38.W1 C8 /r</opc>
			<cpuid>
				<flag>AVX512ER</flag>
			</cpuid>
			<dscrp>Computes approximations to the exponential 2^x (with less than 2^-23 of maximum relative error) of the packed doubleprecision floating-point values from zmm2/m512/m64bcst and stores the floating-point result in zmm1with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VEXP2PS--Approximation to the Exponential 2^x of Packed Single-Precision Floating-Point Values with Less Than 2^-23 Relative Error.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VEXP2PS</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst {sae}</args>
			<opc openc="FV">EVEX.512.66.0F38.W0 C8 /r</opc>
			<cpuid>
				<flag>AVX512ER</flag>
			</cpuid>
			<dscrp>Computes approximations to the exponential 2^x (with less than 2^-23 of maximum relative error) of the packed singleprecision floating-point values from zmm2/m512/m32bcst and stores the floating-point result in zmm1with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRCP28PD--Approximation to the Reciprocal of Packed Double-Precision Floating-Point Values with Less Than 2^-28 Relative Error.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRCP28PD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst {sae}</args>
			<opc openc="FV">EVEX.512.66.0F38.W1 CA /r</opc>
			<cpuid>
				<flag>AVX512ER</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocals ( &lt; 2^-28 relative error) of the packed double-precision floating-point values in zmm2/m512/m64bcst and stores the results in zmm1. Under writemask.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRCP28SD--Approximation to the Reciprocal of Scalar Double-Precision Floating-Point Value with Less Than 2^-28 Relative Error.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRCP28SD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64 {sae}</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W1 CB /r</opc>
			<cpuid>
				<flag>AVX512ER</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocal ( &lt; 2^-28 relative error) of the scalar double-precision floating-point value in xmm3/m64 and stores the results in xmm1. Under writemask. Also, upper double-precision floating-point value (bits[127:64]) from xmm2 is copied to xmm1[127:64].</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRCP28PS--Approximation to the Reciprocal of Packed Single-Precision Floating-Point Values with Less Than 2^-28 Relative Error.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRCP28PS</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst {sae}</args>
			<opc openc="FV">EVEX.512.66.0F38.W0 CA /r</opc>
			<cpuid>
				<flag>AVX512ER</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocals ( &lt; 2^-28 relative error) of the packed single-precision floating-point values in zmm2/m512/m32bcst and stores the results in zmm1. Under writemask.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRCP28SS--Approximation to the Reciprocal of Scalar Single-Precision Floating-Point Value with Less Than 2^-28 Relative Error.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRCP28SS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32 {sae}</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W0 CB /r</opc>
			<cpuid>
				<flag>AVX512ER</flag>
			</cpuid>
			<dscrp>Computes the approximate reciprocal ( &lt; 2^-28 relative error) of the scalar single-precision floating-point value in xmm3/m32 and stores the results in xmm1. Under writemask. Also, upper 3 single-precision floating-point values (bits[127:32]) from xmm2 is copied to xmm1[127:32].</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRSQRT28PD--Approximation to the Reciprocal Square Root of Packed Double-Precision Floating-Point Values with Less Than 2^-28 Relative Error.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRSQRT28PD</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m64bcst {sae}</args>
			<opc openc="FV">EVEX.512.66.0F38.W1 CC /r</opc>
			<cpuid>
				<flag>AVX512ER</flag>
			</cpuid>
			<dscrp>Computes approximations to the Reciprocal square root (&lt;2^28 relative error) of the packed double-precision floating-point values from zmm2/m512/m64bcst and stores result in zmm1with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRSQRT28SD--Approximation to the Reciprocal Square Root of Scalar Double-Precision Floating-Point Value with Less Than 2^-28 Relative Error.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRSQRT28SD</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m64 {sae}</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W1 CD /r</opc>
			<cpuid>
				<flag>AVX512ER</flag>
			</cpuid>
			<dscrp>Computes approximate reciprocal square root (&lt;2^-28 relative error) of the scalar double-precision floating-point value from xmm3/m64 and stores result in xmm1with writemask k1. Also, upper double-precision floating-point value (bits[127:64]) from xmm2 is copied to xmm1[127:64].</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRSQRT28PS--Approximation to the Reciprocal Square Root of Packed Single-Precision Floating-Point Values with Less Than 2^-28 Relative Error.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRSQRT28PS</mnem>
			<args>zmm1 {k1}{z},zmm2/m512/m32bcst {sae}</args>
			<opc openc="FV">EVEX.512.66.0F38.W0 CC /r</opc>
			<cpuid>
				<flag>AVX512ER</flag>
			</cpuid>
			<dscrp>Computes approximations to the Reciprocal square root (&lt;2^-28 relative error) of the packed single-precision floating-point values from zmm2/m512/m32bcst and stores result in zmm1with writemask k1.</dscrp>
		</ins>
		<oprndenc openc="FV">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VRSQRT28SS--Approximation to the Reciprocal Square Root of Scalar Single-Precision Floating-Point Value with Less Than 2^-28 Relative Error.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VRSQRT28SS</mnem>
			<args>xmm1 {k1}{z},xmm2,xmm3/m32 {sae}</args>
			<opc openc="T1S">EVEX.NDS.LIG.66.0F38.W0 CD /r</opc>
			<cpuid>
				<flag>AVX512ER</flag>
			</cpuid>
			<dscrp>Computes approximate reciprocal square root (&lt;2^-28 relative error) of the scalar single-precision floating-point value from xmm3/m32 and stores result in xmm1with writemask k1. Also, upper 3 single-precision floating-point value (bits[127:32]) from xmm2 is copied to xmm1[127:32].</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>ModRM:reg(w)</oprnd1>
			<oprnd2>EVEX.vvvv(r)</oprnd2>
			<oprnd3>ModRM:r/m(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VGATHERPF0DPS/VGATHERPF0QPS/VGATHERPF0DPD/VGATHERPF0QPD--Sparse Prefetch Packed SP/DP Data Values with Signed Dword, Signed Qword Indices Using T0 Hint.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERPF0DPS</mnem>
			<args>vm32z {k1}</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 C6 /1 /vsib</opc>
			<cpuid>
				<flag>AVX512PF</flag>
			</cpuid>
			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing single-precision data using opmask k1 and T0 hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERPF0QPS</mnem>
			<args>vm64z {k1}</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 C7 /1 /vsib</opc>
			<cpuid>
				<flag>AVX512PF</flag>
			</cpuid>
			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing single-precision data using opmask k1 and T0 hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERPF0DPD</mnem>
			<args>vm32y {k1}</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 C6 /1 /vsib</opc>
			<cpuid>
				<flag>AVX512PF</flag>
			</cpuid>
			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing double-precision data using opmask k1 and T0 hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERPF0QPD</mnem>
			<args>vm64z {k1}</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 C7 /1 /vsib</opc>
			<cpuid>
				<flag>AVX512PF</flag>
			</cpuid>
			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing double-precision data using opmask k1 and T0 hint.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd1>
			<oprnd2>NA</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VGATHERPF1DPS/VGATHERPF1QPS/VGATHERPF1DPD/VGATHERPF1QPD--Sparse Prefetch Packed SP/DP Data Values with Signed Dword, Signed Qword Indices Using T1 Hint.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERPF1DPS</mnem>
			<args>vm32z {k1}</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 C6 /2 /vsib</opc>
			<cpuid>
				<flag>AVX512PF</flag>
			</cpuid>
			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing single-precision data using opmask k1 and T1 hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERPF1QPS</mnem>
			<args>vm64z {k1}</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 C7 /2 /vsib</opc>
			<cpuid>
				<flag>AVX512PF</flag>
			</cpuid>
			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing single-precision data using opmask k1 and T1 hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERPF1DPD</mnem>
			<args>vm32y {k1}</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 C6 /2 /vsib</opc>
			<cpuid>
				<flag>AVX512PF</flag>
			</cpuid>
			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing double-precision data using opmask k1 and T1 hint.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VGATHERPF1QPD</mnem>
			<args>vm64z {k1}</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 C7 /2 /vsib</opc>
			<cpuid>
				<flag>AVX512PF</flag>
			</cpuid>
			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing double-precision data using opmask k1 and T1 hint.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd1>
			<oprnd2>NA</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VSCATTERPF0DPS/VSCATTERPF0QPS/VSCATTERPF0DPD/VSCATTERPF0QPD--Sparse Prefetch Packed SP/DP Data Values with Signed Dword, Signed Qword Indices Using T0 Hint with Intent to Write.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERPF0DPS</mnem>
			<args>vm32z {k1}</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 C6 /5 /vsib</opc>
			<cpuid>
				<flag>AVX512PF</flag>
			</cpuid>
			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing single-precision data using writemask k1 and T0 hint with intent to write.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERPF0QPS</mnem>
			<args>vm64z {k1}</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 C7 /5 /vsib</opc>
			<cpuid>
				<flag>AVX512PF</flag>
			</cpuid>
			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing single-precision data using writemask k1 and T0 hint with intent to write.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERPF0DPD</mnem>
			<args>vm32y {k1}</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 C6 /5 /vsib</opc>
			<cpuid>
				<flag>AVX512PF</flag>
			</cpuid>
			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing double-precision data using writemask k1 and T0 hint with intent to write.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERPF0QPD</mnem>
			<args>vm64z {k1}</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 C7 /5 /vsib</opc>
			<cpuid>
				<flag>AVX512PF</flag>
			</cpuid>
			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing double-precision data using writemask k1 and T0 hint with intent to write.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd1>
			<oprnd2>NA</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>VSCATTERPF1DPS/VSCATTERPF1QPS/VSCATTERPF1DPD/VSCATTERPF1QPD--Sparse Prefetch Packed SP/DP Data Values with Signed Dword, Signed Qword Indices Using T1 Hint with Intent to Write.</brief>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERPF1DPS</mnem>
			<args>vm32z {k1}</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 C6 /6 /vsib</opc>
			<cpuid>
				<flag>AVX512PF</flag>
			</cpuid>
			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing single-precision data using writemask k1 and T1 hint with intent to write.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERPF1QPS</mnem>
			<args>vm64z {k1}</args>
			<opc openc="T1S">EVEX.512.66.0F38.W0 C7 /6 /vsib</opc>
			<cpuid>
				<flag>AVX512PF</flag>
			</cpuid>
			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing single-precision data using writemask k1 and T1 hint with intent to write.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERPF1DPD</mnem>
			<args>vm32y {k1}</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 C6 /6 /vsib</opc>
			<cpuid>
				<flag>AVX512PF</flag>
			</cpuid>
			<dscrp>Using signed dword indices, prefetch sparse byte memory locations containing double-precision data using writemask k1 and T1 hint with intent to write.</dscrp>
		</ins>
		<ins x32m="V" x64m="V">
			<mnem>VSCATTERPF1QPD</mnem>
			<args>vm64z {k1}</args>
			<opc openc="T1S">EVEX.512.66.0F38.W1 C7 /6 /vsib</opc>
			<cpuid>
				<flag>AVX512PF</flag>
			</cpuid>
			<dscrp>Using signed qword indices, prefetch sparse byte memory locations containing double-precision data using writemask k1 and T1 hint with intent to write.</dscrp>
		</ins>
		<oprndenc openc="T1S">
			<oprnd1>BaseReg(R): VSIB:base,VectorReg(R): VSIB:index</oprnd1>
			<oprnd2>NA</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>SHA1RNDS4--Perform Four Rounds of SHA1 Operation.</brief>
		<ins x32m="V" x64m="V">
			<mnem>SHA1RNDS4</mnem>
			<args>xmm1,xmm2/m128,imm8</args>
			<opc openc="RMI">0F 3A CC /r ib</opc>
			<cpuid>
				<flag>SHA</flag>
			</cpuid>
			<dscrp>Performs four rounds of SHA1 operation operating on SHA1 state (A,B,C,D) from xmm1, with a pre-computed sum of the next 4 round message dwords and state variable E from xmm2/m128. The immediate byte controls logic functions and round constants.</dscrp>
		</ins>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Imm8</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>SHA1NEXTE--Calculate SHA1 State Variable E after Four Rounds.</brief>
		<ins x32m="V" x64m="V">
			<mnem>SHA1NEXTE</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 38 C8 /r</opc>
			<cpuid>
				<flag>SHA</flag>
			</cpuid>
			<dscrp>Calculates SHA1 state variable E after four rounds of operation from the current SHA1 state variable A in xmm1. The calculated value of the SHA1 state variable E is added to the scheduled dwords in xmm2/m128, and stored with some of the scheduled dwords in xmm1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>SHA1MSG1--Perform an Intermediate Calculation for the Next Four SHA1 Message Dwords.</brief>
		<ins x32m="V" x64m="V">
			<mnem>SHA1MSG1</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 38 C9 /r</opc>
			<cpuid>
				<flag>SHA</flag>
			</cpuid>
			<dscrp>Performs an intermediate calculation for the next four SHA1 message dwords using previous message dwords from xmm1 and xmm2/m128, storing the result in xmm1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>SHA1MSG2--Perform a Final Calculation for the Next Four SHA1 Message Dwords.</brief>
		<ins x32m="V" x64m="V">
			<mnem>SHA1MSG2</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 38 CA /r</opc>
			<cpuid>
				<flag>SHA</flag>
			</cpuid>
			<dscrp>Performs the final calculation for the next four SHA1 message dwords using intermediate results from xmm1 and the previous message dwords from xmm2/m128, storing the result in xmm1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>SHA256RNDS2--Perform Two Rounds of SHA256 Operation.</brief>
		<ins x32m="V" x64m="V">
			<mnem>SHA256RNDS2</mnem>
			<args>xmm1,xmm2/m128,&lt;XMM0&gt;</args>
			<opc openc="RM0">0F 38 CB /r</opc>
			<cpuid>
				<flag>SHA</flag>
			</cpuid>
			<dscrp>Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H) from xmm1, an initial SHA256 state (A,B,E,F) from xmm2/m128, and a pre-computed sum of the next 2 round message dwords and the corresponding round constants from the implicit operand XMM0, storing the updated SHA256 state (A,B,E,F) result in xmm1.</dscrp>
		</ins>
		<oprndenc openc="RMI">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>Implicit XMM0(r)</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>SHA256MSG1--Perform an Intermediate Calculation for the Next Four SHA256 Message Dwords.</brief>
		<ins x32m="V" x64m="V">
			<mnem>SHA256MSG1</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 38 CC /r</opc>
			<cpuid>
				<flag>SHA</flag>
			</cpuid>
			<dscrp>Performs an intermediate calculation for the next four SHA256 message dwords using previous message dwords from xmm1 and xmm2/m128, storing the result in xmm1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>SHA256MSG2--Perform a Final Calculation for the Next Four SHA256 Message Dwords.</brief>
		<ins x32m="V" x64m="V">
			<mnem>SHA256MSG2</mnem>
			<args>xmm1,xmm2/m128</args>
			<opc openc="RM">0F 38 CD /r</opc>
			<cpuid>
				<flag>SHA</flag>
			</cpuid>
			<dscrp>Performs the final calculation for the next four SHA256 message dwords using previous message dwords from xmm1 and xmm2/m128, storing the result in xmm1.</dscrp>
		</ins>
		<oprndenc openc="RM">
			<oprnd1>ModRM:reg(r,w)</oprnd1>
			<oprnd2>ModRM:r/m(r)</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PREFETCHWT1--Prefetch Vector Data Into Caches with Intent to Write and T1 Hint.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PREFETCHWT1</mnem>
			<args>m8</args>
			<opc openc="M">0F 0D /2</opc>
			<cpuid>
				<flag>PREFETCHWT1</flag>
			</cpuid>
			<dscrp>Move data from m8 closer to the processor using T1 hint with intent to write.</dscrp>
		</ins>
		<oprndenc openc="M">
			<oprnd1>ModRM:r/m(r)</oprnd1>
			<oprnd2>NA</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CLWB--Cache Line Write Back (THIS IS AN EXAMPLE).</brief>
		<ins x32m="V" x64m="V">
			<mnem>CLWB</mnem>
			<args>m8</args>
			<opc openc="M">66 0F AE /6</opc>
			<cpuid>
				<flag>CLWB</flag>
			</cpuid>
			<dscrp>Writes back modified cache line containing m8, and may retain the line in cache hierarchy in non-modified state.</dscrp>
		</ins>
		<oprndenc openc="M">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>NA</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
		<oprndenc openc=" ">
			<oprnd1>NA</oprnd1>
			<oprnd2>NA</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>CLWB--Cache Line Write Back.</brief>
		<ins x32m="V" x64m="V">
			<mnem>CLWB</mnem>
			<args>m8</args>
			<opc openc="M">66 0F AE /6</opc>
			<cpuid>
				<flag>CLWB</flag>
			</cpuid>
			<dscrp>Writes back modified cache line containing m8, and may retain the line in cache hierarchy in non-modified state.</dscrp>
		</ins>
		<oprndenc openc="M">
			<oprnd1>ModRM:r/m(w)</oprnd1>
			<oprnd2>NA</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
	<common>
		<brief>PCOMMIT--Persistent Commit.</brief>
		<ins x32m="V" x64m="V">
			<mnem>PCOMMIT</mnem>
			<args>void</args>
			<opc openc="NP">66 0F AE F8</opc>
			<cpuid>
				<flag>PCOMMIT</flag>
			</cpuid>
			<dscrp>Commits stores to persistent memory.</dscrp>
		</ins>
		<oprndenc openc="NP">
			<oprnd1>NA</oprnd1>
			<oprnd2>NA</oprnd2>
			<oprnd3>NA</oprnd3>
			<oprnd4>NA</oprnd4>
		</oprndenc>
	</common>
</instrs>