// PERMUTE_ARGS: // Copyright (c) 1999-2016 by The D Language Foundation // All Rights Reserved // written by Walter Bright // https://www.digitalmars.com import core.stdc.stdio; import core.stdc.config; version (D_PIC) { int main() { return 0; } } else version (D_PIE) { int main() { return 0; } } else version (D_InlineAsm_X86_64) { struct M128 { int a,b,c,d; }; struct M64 { int a,b; }; /+ __gshared byte b; __gshared short w; __gshared int i; __gshared long l; +/ /****************************************************/ void test1() { int foo; int bar; static const int x = 4; asm { align x; ; mov EAX, __LOCAL_SIZE ; mov foo[RBP], EAX ; } assert(foo == 16); // stack must be 16 byte aligned } /****************************************************/ void test2() { int foo; int bar; asm { even ; mov EAX,0 ; inc EAX ; mov foo[RBP], EAX ; } assert(foo == 1); } /****************************************************/ void test3() { int foo; int bar; asm { mov EAX,5 ; jmp $ + 2 ; dw 0xC0FF,0xC8FF ; // inc EAX, dec EAX mov foo[RBP],EAX ; } assert(foo == 4); } /****************************************************/ void test4() { int foo; int bar; asm { xor EAX,EAX ; add EAX,5 ; jne L1 ; dw 0xC0FF,0xC8FF ; // inc EAX, dec EAX L1: dw 0xC8FF ; mov foo[RBP],EAX ; } assert(foo == 4); } /****************************************************/ void test5() { int foo; ubyte *p; ushort *w; uint *u; ulong *ul; float *f; double *d; real *e; static float fs = 1.1; static double ds = 1.2; static real es = 1.3; asm { call L1 ; db 0xFF,0xC0; ; // inc EAX db "abc" ; ds "def" ; di "ghi" ; dl 0x12345678ABCDEF; df 1.1 ; dd 1.2 ; de 1.3 ; L1: pop RBX ; mov p[RBP],RBX ; } assert(p[0] == 0xFF); assert(p[1] == 0xC0); assert(p[2] == 'a'); assert(p[3] == 'b'); assert(p[4] == 'c'); w = cast(ushort *)(p + 5); assert(w[0] == 'd'); assert(w[1] == 'e'); assert(w[2] == 'f'); u = cast(uint *)(w + 3); assert(u[0] == 'g'); assert(u[1] == 'h'); assert(u[2] == 'i'); ul = cast(ulong *)(u + 3); assert(ul[0] == 0x12345678ABCDEF); f = cast(float *)(ul + 1); assert(*f == fs); d = cast(double *)(f + 1); assert(*d == ds); e = cast(real *)(d + 1); assert(*e == es); } /****************************************************/ void test6() { ubyte *p; static ubyte[] data = [ 0x8B, 0x01, // mov EAX,[RCX] 0x8B, 0x04, 0x19, // mov EAX,[RBX][RCX] 0x8B, 0x04, 0x4B, // mov EAX,[RCX*2][RBX] 0x8B, 0x04, 0x5A, // mov EAX,[RBX*2][RDX] 0x8B, 0x04, 0x8E, // mov EAX,[RCX*4][RSI] 0x8B, 0x04, 0xF9, // mov EAX,[RDI*8][RCX] 0x2B, 0x1C, 0x19, // sub EBX,[RBX][RCX] 0x3B, 0x0C, 0x4B, // cmp ECX,[RCX*2][RBX] 0x03, 0x14, 0x5A, // add EDX,[RBX*2][RDX] 0x33, 0x34, 0x8E, // xor ESI,[RCX*4][RSI] 0x29, 0x1C, 0x19, // sub [RBX][RCX],EBX 0x39, 0x0C, 0x4B, // cmp [RCX*2][RBX],ECX 0x01, 0x24, 0x5A, // add [RBX*2][RDX],ESP 0x31, 0x2C, 0x8E, // xor [RCX*4][RSI],EBP 0xA8, 0x03, // test AL,3 0x66, 0xA9, 0x04, 0x00, // test AX,4 0xA9, 0x05, 0x00, 0x00, 0x00, // test EAX,5 0x85, 0x3C, 0xF9, // test [RDI*8][RCX],EDI 0x49, 0x8B, 0x45, 0x00, // mov RAX,0[R13] 0x4A, 0x8B, 0x04, 0x1D, 0x00, 0x00, 0x00, 0x00, // mov RAX,[00h][R11] 0x49, 0x8B, 0x03, // mov RAX,[R11] 0x8B, 0x05, 0x00, 0x00, 0x00, 0x00, // mov EAX,[RIP] 0x8B, 0x05, 0x05, 0x00, 0x00, 0x00, // mov EAX,5[RIP] ]; int i; asm { call L1 ; mov EAX,[RCX] ; mov EAX,[RCX][RBX] ; mov EAX,[RCX*2][RBX] ; mov EAX,[RDX][RBX*2] ; mov EAX,[RCX*4][RSI] ; mov EAX,[RCX][RDI*8] ; sub EBX,[RCX][RBX] ; cmp ECX,[RCX*2][RBX] ; add EDX,[RDX][RBX*2] ; xor ESI,[RCX*4][RSI] ; sub [RCX][RBX],EBX ; cmp [RCX*2][RBX],ECX ; add [RDX][RBX*2],ESP ; xor [RCX*4][RSI],EBP ; test AL,3 ; test AX,4 ; test EAX,5 ; test [RCX][RDI*8],EDI ; mov RAX,[R13] ; mov RAX,[0+1*R11] ; mov RAX,[R11] ; mov EAX,[RIP] ; mov EAX,5[RIP] ; L1: pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { assert(p[i] == data[i]); } } /****************************************************/ /+ void test7() { ubyte *p; static ubyte[] data = [ 0x26,0xA1,0x24,0x13,0x00,0x00, // mov EAX,ES:[01324h] 0x36,0x66,0xA1,0x78,0x56,0x00,0x00, // mov AX,SS:[05678h] 0xA0,0x78,0x56,0x00,0x00, // mov AL,[05678h] 0x2E,0x8A,0x25,0x78,0x56,0x00,0x00, // mov AH,CS:[05678h] 0x64,0x8A,0x1D,0x78,0x56,0x00,0x00, // mov BL,FS:[05678h] 0x65,0x8A,0x3D,0x78,0x56,0x00,0x00, // mov BH,GS:[05678h] ]; int i; asm { call L1 ; mov EAX,ES:[0x1324] ; mov AX,SS:[0x5678] ; mov AL,DS:[0x5678] ; mov AH,CS:[0x5678] ; mov BL,FS:[0x5678] ; mov BH,GS:[0x5678] ; L1: ; pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { assert(p[i] == data[i]); } } +/ /****************************************************/ void test8() { ubyte *p; static ubyte[] data = [ 0x8C,0xD0, // mov AX,SS 0x8C,0xDB, // mov BX,DS 0x8C,0xC1, // mov CX,ES 0x8C,0xCA, // mov DX,CS 0x8C,0xE6, // mov SI,FS 0x8C,0xEF, // mov DI,GS 0x8E,0xD0, // mov SS,AX 0x8E,0xDB, // mov DS,BX 0x8E,0xC1, // mov ES,CX 0x8E,0xCA, // mov CS,DX 0x8E,0xE6, // mov FS,SI 0x8E,0xEF, // mov GS,DI 0x0F,0x22,0xC0, // mov CR0,EAX 0x0F,0x22,0xD3, // mov CR2,EBX 0x0F,0x22,0xD9, // mov CR3,ECX 0x0F,0x22,0xE2, // mov CR4,EDX 0x0F,0x20,0xC0, // mov EAX,CR0 0x0F,0x20,0xD3, // mov EBX,CR2 0x0F,0x20,0xD9, // mov ECX,CR3 0x0F,0x20,0xE2, // mov EDX,CR4 0x0F,0x23,0xC0, // mov DR0,EAX 0x0F,0x23,0xCE, // mov DR1,ESI 0x0F,0x23,0xD3, // mov DR2,EBX 0x0F,0x23,0xD9, // mov DR3,ECX 0x0F,0x23,0xE2, // mov DR4,EDX 0x0F,0x23,0xEF, // mov DR5,EDI 0x0F,0x23,0xF4, // mov DR6,ESP 0x0F,0x23,0xFD, // mov DR7,EBP 0x0F,0x21,0xC4, // mov ESP,DR0 0x0F,0x21,0xCD, // mov EBP,DR1 0x0F,0x21,0xD0, // mov EAX,DR2 0x0F,0x21,0xDB, // mov EBX,DR3 0x0F,0x21,0xE1, // mov ECX,DR4 0x0F,0x21,0xEA, // mov EDX,DR5 0x0F,0x21,0xF6, // mov ESI,DR6 0x0F,0x21,0xFF, // mov EDI,DR7 0xA4, // movsb 0x66,0xA5, // movsw 0xA5, // movsd ]; int i; asm { call L1 ; mov AX,SS ; mov BX,DS ; mov CX,ES ; mov DX,CS ; mov SI,FS ; mov DI,GS ; mov SS,AX ; mov DS,BX ; mov ES,CX ; mov CS,DX ; mov FS,SI ; mov GS,DI ; mov CR0,EAX ; mov CR2,EBX ; mov CR3,ECX ; mov CR4,EDX ; mov EAX,CR0 ; mov EBX,CR2 ; mov ECX,CR3 ; mov EDX,CR4 ; mov DR0,EAX ; mov DR1,ESI ; mov DR2,EBX ; mov DR3,ECX ; mov DR4,EDX ; mov DR5,EDI ; mov DR6,ESP ; mov DR7,EBP ; mov ESP,DR0 ; mov EBP,DR1 ; mov EAX,DR2 ; mov EBX,DR3 ; mov ECX,DR4 ; mov EDX,DR5 ; mov ESI,DR6 ; mov EDI,DR7 ; movsb ; movsw ; movsd ; L1: ; pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { assert(p[i] == data[i]); } } /****************************************************/ void test9() { ubyte *p; static ubyte[] data = [ 0x67,0x66,0x8B,0x00, // mov AX,[BX+SI] 0x67,0x66,0x8B,0x01, // mov AX,[BX+DI] 0x67,0x66,0x8B,0x02, // mov AX,[BP+SI] 0x67,0x66,0x8B,0x03, // mov AX,[BP+DI] 0x67,0x66,0x8B,0x04, // mov AX,[SI] 0x67,0x66,0x8B,0x05, // mov AX,[DI] 0x66,0xB8,0xD2,0x04, // mov AX,04D2h 0x67,0x66,0x8B,0x07, // mov AX,[BX] 0x67,0x66,0x8B,0x40,0x01, // mov AX,1[BX+SI] 0x67,0x66,0x8B,0x41,0x02, // mov AX,2[BX+DI] 0x67,0x66,0x8B,0x42,0x03, // mov AX,3[BP+SI] 0x67,0x66,0x8B,0x43,0x04, // mov AX,4[BP+DI] 0x67,0x66,0x8B,0x44,0x05, // mov AX,5[SI] 0x67,0x66,0x8B,0x45,0x06, // mov AX,6[DI] 0x67,0x66,0x8B,0x43,0x07, // mov AX,7[BP+DI] 0x67,0x66,0x8B,0x47,0x08, // mov AX,8[BX] 0x67,0x8B,0x80,0x21,0x01, // mov EAX,0121h[BX+SI] 0x67,0x66,0x8B,0x81,0x22,0x01, // mov AX,0122h[BX+DI] 0x67,0x66,0x8B,0x82,0x43,0x23, // mov AX,02343h[BP+SI] 0x67,0x66,0x8B,0x83,0x54,0x45, // mov AX,04554h[BP+DI] 0x67,0x66,0x8B,0x84,0x45,0x66, // mov AX,06645h[SI] 0x67,0x66,0x8B,0x85,0x36,0x12, // mov AX,01236h[DI] 0x67,0x66,0x8B,0x86,0x67,0x45, // mov AX,04567h[BP] 0x67,0x8A,0x87,0x08,0x01, // mov AL,0108h[BX] ]; int i; asm { call L1 ; mov AX,[BX+SI] ; mov AX,[BX+DI] ; mov AX,[BP+SI] ; mov AX,[BP+DI] ; mov AX,[SI] ; // mov AX,[DI] ; Internal error: backend/cod3.c 4652 mov AX,[1234] ; mov AX,[BX] ; mov AX,1[BX+SI] ; mov AX,2[BX+DI] ; mov AX,3[BP+SI] ; mov AX,4[BP+DI] ; mov AX,5[SI] ; mov AX,6[DI] ; mov AX,7[DI+BP] ; mov AX,8[BX] ; mov EAX,0x121[BX+SI] ; mov AX,0x122[BX+DI] ; mov AX,0x2343[BP+SI] ; mov AX,0x4554[BP+DI] ; mov AX,0x6645[SI] ; mov AX,0x1236[DI] ; mov AX,0x4567[BP] ; mov AL,0x108[BX] ; L1: ; pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { assert(p[i] == data[i]); } } /****************************************************/ shared int bar10 = 78; shared int[2] baz10; void test10() { ubyte *p; int foo; static ubyte[] data = [ ]; int i; asm { mov bar10,0x12 ; // mov baz10,0x13 ;// does not compile, ( should it? ) mov int ptr baz10,0x13 ;// but this does mov ESI,1 ; mov baz10[RSI*4],0x14 ; } assert(bar10 == 0x12); assert(baz10[0] == 0x13); assert(baz10[1] == 0x14); } /****************************************************/ struct Foo11 { int c; int a; int b; } void test11() { ubyte *p; int x1; int x2; int x3; int x4; asm { mov x1,Foo11.a.sizeof ; mov x2,Foo11.b.offsetof ; mov x3,Foo11.sizeof ; mov x4,Foo11.sizeof + 7 ; } assert(x1 == int.sizeof); assert(x2 == 8); assert(x3 == 12); assert(x4 == 19); } /****************************************************/ void test12() { ubyte *p; static ubyte[] data = [ 0x14,0x05, // adc AL,5 0x83,0xD0,0x14, // adc EAX,014h 0x80,0x55,0xF8,0x17, // adc byte ptr -8[RBP],017h 0x83,0x55,0xFC,0x17, // adc dword ptr -4[RBP],017h 0x81,0x55,0xFC,0x34,0x12,0x00,0x00, // adc dword ptr -4[RBP],01234h 0x10,0x7D,0xF8, // adc -8[RBP],BH 0x11,0x5D,0xFC, // adc -4[RBP],EBX 0x12,0x5D,0xF8, // adc BL,-8[RBP] 0x13,0x55,0xFC, // adc EDX,-4[RBP] 0x04,0x05, // add AL,5 0x83,0xC0,0x14, // add EAX,014h 0x80,0x45,0xF8,0x17, // add byte ptr -8[RBP],017h 0x83,0x45,0xFC,0x17, // add dword ptr -4[RBP],017h 0x81,0x45,0xFC,0x34,0x12,0x00,0x00, // add dword ptr -4[RBP],01234h 0x00,0x7D,0xF8, // add -8[RBP],BH 0x01,0x5D,0xFC, // add -4[RBP],EBX 0x02,0x5D,0xF8, // add BL,-8[RBP] 0x03,0x55,0xFC, // add EDX,-4[RBP] 0x24,0x05, // and AL,5 0x83,0xE0,0x14, // and EAX,014h 0x80,0x65,0xF8,0x17, // and byte ptr -8[RBP],017h 0x83,0x65,0xFC,0x17, // and dword ptr -4[RBP],017h 0x81,0x65,0xFC,0x34,0x12,0x00,0x00, // and dword ptr -4[RBP],01234h 0x20,0x7D,0xF8, // and -8[RBP],BH 0x21,0x5D,0xFC, // and -4[RBP],EBX 0x22,0x5D,0xF8, // and BL,-8[RBP] 0x23,0x55,0xFC, // and EDX,-4[RBP] 0x3C,0x05, // cmp AL,5 0x83,0xF8,0x14, // cmp EAX,014h 0x80,0x7D,0xF8,0x17, // cmp byte ptr -8[RBP],017h 0x83,0x7D,0xFC,0x17, // cmp dword ptr -4[RBP],017h 0x81,0x7D,0xFC,0x34,0x12,0x00,0x00, // cmp dword ptr -4[RBP],01234h 0x38,0x7D,0xF8, // cmp -8[RBP],BH 0x39,0x5D,0xFC, // cmp -4[RBP],EBX 0x3A,0x5D,0xF8, // cmp BL,-8[RBP] 0x3B,0x55,0xFC, // cmp EDX,-4[RBP] 0x0C,0x05, // or AL,5 0x83,0xC8,0x14, // or EAX,014h 0x80,0x4D,0xF8,0x17, // or byte ptr -8[RBP],017h 0x83,0x4D,0xFC,0x17, // or dword ptr -4[RBP],017h 0x81,0x4D,0xFC,0x34,0x12,0x00,0x00, // or dword ptr -4[RBP],01234h 0x08,0x7D,0xF8, // or -8[RBP],BH 0x09,0x5D,0xFC, // or -4[RBP],EBX 0x0A,0x5D,0xF8, // or BL,-8[RBP] 0x0B,0x55,0xFC, // or EDX,-4[RBP] 0x1C,0x05, // sbb AL,5 0x83,0xD8,0x14, // sbb EAX,014h 0x80,0x5D,0xF8,0x17, // sbb byte ptr -8[RBP],017h 0x83,0x5D,0xFC,0x17, // sbb dword ptr -4[RBP],017h 0x81,0x5D,0xFC,0x34,0x12,0x00,0x00, // sbb dword ptr -4[RBP],01234h 0x18,0x7D,0xF8, // sbb -8[RBP],BH 0x19,0x5D,0xFC, // sbb -4[RBP],EBX 0x1A,0x5D,0xF8, // sbb BL,-8[RBP] 0x1B,0x55,0xFC, // sbb EDX,-4[RBP] 0x2C,0x05, // sub AL,5 0x83,0xE8,0x14, // sub EAX,014h 0x80,0x6D,0xF8,0x17, // sub byte ptr -8[RBP],017h 0x83,0x6D,0xFC,0x17, // sub dword ptr -4[RBP],017h 0x81,0x6D,0xFC,0x34,0x12,0x00,0x00, // sub dword ptr -4[RBP],01234h 0x28,0x7D,0xF8, // sub -8[RBP],BH 0x29,0x5D,0xFC, // sub -4[RBP],EBX 0x2A,0x5D,0xF8, // sub BL,-8[RBP] 0x2B,0x55,0xFC, // sub EDX,-4[RBP] 0xA8,0x05, // test AL,5 0xA9,0x14,0x00,0x00,0x00, // test EAX,014h 0xF6,0x45,0xF8,0x17, // test byte ptr -8[RBP],017h 0xF7,0x45,0xFC,0x17,0x00,0x00,0x00, // test dword ptr -4[RBP],017h 0xF7,0x45,0xFC,0x34,0x12,0x00,0x00, // test dword ptr -4[RBP],01234h 0x84,0x7D,0xF8, // test -8[RBP],BH 0x85,0x5D,0xFC, // test -4[RBP],EBX 0x34,0x05, // xor AL,5 0x83,0xF0,0x14, // xor EAX,014h 0x80,0x75,0xF8,0x17, // xor byte ptr -8[RBP],017h 0x83,0x75,0xFC,0x17, // xor dword ptr -4[RBP],017h 0x81,0x75,0xFC,0x34,0x12,0x00,0x00, // xor dword ptr -4[RBP],01234h 0x30,0x7D,0xF8, // xor -8[RBP],BH 0x31,0x5D,0xFC, // xor -4[RBP],EBX 0x32,0x5D,0xF8, // xor BL,-8[RBP] 0x33,0x55,0xFC, // xor EDX,-4[RBP] ]; int i; int padding; byte rm8; int rm32; static int m32; asm { call L1 ; /* aaa ; aad ; aam ; aas ; arpl [SI],DI ; */ adc AL,5 ; adc EAX,20 ; adc rm8[RBP],23 ; adc rm32[RBP],23 ; adc rm32[RBP],0x1234 ; adc rm8[RBP],BH ; adc rm32[RBP],EBX ; adc BL,rm8[RBP] ; adc EDX,rm32[RBP] ; add AL,5 ; add EAX,20 ; add rm8[RBP],23 ; add rm32[RBP],23 ; add rm32[RBP],0x1234 ; add rm8[RBP],BH ; add rm32[RBP],EBX ; add BL,rm8[RBP] ; add EDX,rm32[RBP] ; and AL,5 ; and EAX,20 ; and rm8[RBP],23 ; and rm32[RBP],23 ; and rm32[RBP],0x1234 ; and rm8[RBP],BH ; and rm32[RBP],EBX ; and BL,rm8[RBP] ; and EDX,rm32[RBP] ; cmp AL,5 ; cmp EAX,20 ; cmp rm8[RBP],23 ; cmp rm32[RBP],23 ; cmp rm32[RBP],0x1234 ; cmp rm8[RBP],BH ; cmp rm32[RBP],EBX ; cmp BL,rm8[RBP] ; cmp EDX,rm32[RBP] ; or AL,5 ; or EAX,20 ; or rm8[RBP],23 ; or rm32[RBP],23 ; or rm32[RBP],0x1234 ; or rm8[RBP],BH ; or rm32[RBP],EBX ; or BL,rm8[RBP] ; or EDX,rm32[RBP] ; sbb AL,5 ; sbb EAX,20 ; sbb rm8[RBP],23 ; sbb rm32[RBP],23 ; sbb rm32[RBP],0x1234 ; sbb rm8[RBP],BH ; sbb rm32[RBP],EBX ; sbb BL,rm8[RBP] ; sbb EDX,rm32[RBP] ; sub AL,5 ; sub EAX,20 ; sub rm8[RBP],23 ; sub rm32[RBP],23 ; sub rm32[RBP],0x1234 ; sub rm8[RBP],BH ; sub rm32[RBP],EBX ; sub BL,rm8[RBP] ; sub EDX,rm32[RBP] ; test AL,5 ; test EAX,20 ; test rm8[RBP],23 ; test rm32[RBP],23 ; test rm32[RBP],0x1234 ; test rm8[RBP],BH ; test rm32[RBP],EBX ; xor AL,5 ; xor EAX,20 ; xor rm8[RBP],23 ; xor rm32[RBP],23 ; xor rm32[RBP],0x1234 ; xor rm8[RBP],BH ; xor rm32[RBP],EBX ; xor BL,rm8[RBP] ; xor EDX,rm32[RBP] ; L1: ; pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { //printf("p[%d] = x%02x, data = x%02x\n", i, p[i], data[i]); assert(p[i] == data[i]); } } /****************************************************/ void test13() { int m32; long m64; M128 m128; ubyte *p; static ubyte[] data = [ 0x0F,0x0B, // ud2 0x0F,0x05, // syscall 0x0F,0x34, // sysenter 0x0F,0x35, // sysexit 0x0F,0x07, // sysret 0x0F,0xAE,0xE8, // lfence 0x0F,0xAE,0xF0, // mfence 0x0F,0xAE,0xF8, // sfence 0x0F,0xAE,0x00, // fxsave [RAX] 0x0F,0xAE,0x08, // fxrstor [RAX] 0x0F,0xAE,0x10, // ldmxcsr [RAX] 0x0F,0xAE,0x18, // stmxcsr [RAX] 0x0F,0xAE,0x38, // clflush [RAX] 0x0F,0x58,0x08, // addps XMM1,[RAX] 0x0F,0x58,0xCA, // addps XMM1,XMM2 0x66, 0x0F,0x58,0x03, // addpd XMM0,[RBX] 0x66, 0x0F,0x58,0xD1, // addpd XMM2,XMM1 0xF2,0x0F,0x58,0x08, // addsd XMM1,[RAX] 0xF2,0x0F,0x58,0xCA, // addsd XMM1,XMM2 0xF3,0x0F,0x58,0x2E, // addss XMM5,[RSI] 0xF3,0x0F,0x58,0xF7, // addss XMM6,XMM7 0x0F,0x54,0x08, // andps XMM1,[RAX] 0x0F,0x54,0xCA, // andps XMM1,XMM2 0x66, 0x0F,0x54,0x03, // andpd XMM0,[RBX] 0x66, 0x0F,0x54,0xD1, // andpd XMM2,XMM1 0x0F,0x55,0x08, // andnps XMM1,[RAX] 0x0F,0x55,0xCA, // andnps XMM1,XMM2 0x66, 0x0F,0x55,0x03, // andnpd XMM0,[RBX] 0x66, 0x0F,0x55,0xD1, // andnpd XMM2,XMM1 0xA7, // cmpsd 0x0F,0xC2,0x08,0x01, // cmpps XMM1,[RAX],1 0x0F,0xC2,0xCA,0x02, // cmpps XMM1,XMM2,2 0x66, 0x0F,0xC2,0x03,0x03, // cmppd XMM0,[RBX],3 0x66, 0x0F,0xC2,0xD1,0x04, // cmppd XMM2,XMM1,4 0xF2,0x0F,0xC2,0x08,0x05, // cmpsd XMM1,[RAX],5 0xF2,0x0F,0xC2,0xCA,0x06, // cmpsd XMM1,XMM2,6 0xF3,0x0F,0xC2,0x2E,0x07, // cmpss XMM5,[RSI],7 0xF3,0x0F,0xC2,0xF7,0x00, // cmpss XMM6,XMM7,0 0x66, 0x0F,0x2F,0x08, // comisd XMM1,[RAX] 0x66, 0x0F,0x2F,0x4D,0xD8, // comisd XMM1,-028h[RBP] 0x66, 0x0F,0x2F,0xCA, // comisd XMM1,XMM2 0x0F,0x2F,0x2E, // comiss XMM5,[RSI] 0x0F,0x2F,0xF7, // comiss XMM6,XMM7 0xF3,0x0F,0xE6,0xDC, // cvtdq2pd XMM3,XMM4 0xF3,0x0F,0xE6,0x5D,0xD8, // cvtdq2pd XMM3,-028h[RBP] 0x0F,0x5B,0xDC, // cvtdq2ps XMM3,XMM4 0x0F,0x5B,0x5D,0xE0, // cvtdq2ps XMM3,-020h[RBP] 0xF2,0x0F,0xE6,0xDC, // cvtpd2dq XMM3,XMM4 0xF2,0x0F,0xE6,0x5D,0xE0, // cvtpd2dq XMM3,-020h[RBP] 0x66, 0x0F,0x2D,0xDC, // cvtpd2pi MM3,XMM4 0x66, 0x0F,0x2D,0x5D,0xE0, // cvtpd2pi MM3,-020h[RBP] 0x66, 0x0F,0x5A,0xDC, // cvtpd2ps XMM3,XMM4 0x66, 0x0F,0x5A,0x5D,0xE0, // cvtpd2ps XMM3,-020h[RBP] 0x66, 0x0F,0x2A,0xDC, // cvtpi2pd XMM3,MM4 0x66, 0x0F,0x2A,0x5D,0xD8, // cvtpi2pd XMM3,-028h[RBP] 0x0F,0x2A,0xDC, // cvtpi2ps XMM3,MM4 0x0F,0x2A,0x5D,0xD8, // cvtpi2ps XMM3,-028h[RBP] 0x66, 0x0F,0x5B,0xDC, // cvtps2dq XMM3,XMM4 0x66, 0x0F,0x5B,0x5D,0xE0, // cvtps2dq XMM3,-020h[RBP] 0x0F,0x5A,0xDC, // cvtps2pd XMM3,XMM4 0x0F,0x5A,0x5D,0xD8, // cvtps2pd XMM3,-028h[RBP] 0x0F,0x2D,0xDC, // cvtps2pi MM3,XMM4 0x0F,0x2D,0x5D,0xD8, // cvtps2pi MM3,-030h[RBP] 0xF2,0x0F,0x2D,0xCC, // cvtsd2si XMM1,XMM4 0xF2,0x0F,0x2D,0x55,0xD8, // cvtsd2si XMM2,-028h[RBP] 0xF2,0x0F,0x5A,0xDC, // cvtsd2ss XMM3,XMM4 0xF2,0x0F,0x5A,0x5D,0xD8, // cvtsd2ss XMM3,-028h[RBP] 0xF2,0x0F,0x2A,0xDA, // cvtsi2sd XMM3,EDX 0xF2,0x0F,0x2A,0x5D,0xD0, // cvtsi2sd XMM3,-030h[RBP] 0xF3,0x0F,0x2A,0xDA, // cvtsi2ss XMM3,EDX 0xF3,0x0F,0x2A,0x5D,0xD0, // cvtsi2ss XMM3,-030h[RBP] 0xF3,0x0F,0x5A,0xDC, // cvtss2sd XMM3,XMM4 0xF3,0x0F,0x5A,0x5D,0xD0, // cvtss2sd XMM3,-030h[RBP] 0xF3,0x0F,0x2D,0xFC, // cvtss2si XMM7,XMM4 0xF3,0x0F,0x2D,0x7D,0xD0, // cvtss2si XMM7,-030h[RBP] 0x66, 0x0F,0x2C,0xDC, // cvttpd2pi MM3,XMM4 0x66, 0x0F,0x2C,0x7D,0xE0, // cvttpd2pi MM7,-020h[RBP] 0x66, 0x0F,0xE6,0xDC, // cvttpd2dq XMM3,XMM4 0x66, 0x0F,0xE6,0x7D,0xE0, // cvttpd2dq XMM7,-020h[RBP] 0xF3,0x0F,0x5B,0xDC, // cvttps2dq XMM3,XMM4 0xF3,0x0F,0x5B,0x7D,0xE0, // cvttps2dq XMM7,-020h[RBP] 0x0F,0x2C,0xDC, // cvttps2pi MM3,XMM4 0x0F,0x2C,0x7D,0xD8, // cvttps2pi MM7,-028h[RBP] 0xF2,0x0F,0x2C,0xC4, // cvttsd2si EAX,XMM4 0xF2,0x0F,0x2C,0x4D,0xD8, // cvttsd2si ECX,-028h[RBP] 0xF3,0x0F,0x2C,0xC4, // cvttss2si EAX,XMM4 0xF3,0x0F,0x2C,0x4D,0xD0, // cvttss2si ECX,-030h[RBP] 0x66, 0x0F,0x5E,0xE8, // divpd XMM5,XMM0 0x66, 0x0F,0x5E,0x6D,0xE0, // divpd XMM5,-020h[RBP] 0x0F,0x5E,0xE8, // divps XMM5,XMM0 0x0F,0x5E,0x6D,0xE0, // divps XMM5,-020h[RBP] 0xF2,0x0F,0x5E,0xE8, // divsd XMM5,XMM0 0xF2,0x0F,0x5E,0x6D,0xD8, // divsd XMM5,-028h[RBP] 0xF3,0x0F,0x5E,0xE8, // divss XMM5,XMM0 0xF3,0x0F,0x5E,0x6D,0xD0, // divss XMM5,-030h[RBP] 0x66, 0x0F,0xF7,0xD1, // maskmovdqu XMM2,XMM1 0x0F,0xF7,0xE3, // maskmovq MM4,MM3 0x66, 0x0F,0x5F,0xC0, // maxpd XMM0,XMM0 0x66, 0x0F,0x5F,0x4D,0xE0, // maxpd XMM1,-020h[RBP] 0x0F,0x5F,0xD1, // maxps XMM2,XMM1 0x0F,0x5F,0x5D,0xE0, // maxps XMM3,-020h[RBP] 0xF2,0x0F,0x5F,0xE2, // maxsd XMM4,XMM2 0xF2,0x0F,0x5F,0x6D,0xD8, // maxsd XMM5,-028h[RBP] 0xF3,0x0F,0x5F,0xF3, // maxss XMM6,XMM3 0xF3,0x0F,0x5F,0x7D,0xD0, // maxss XMM7,-030h[RBP] 0x66, 0x0F,0x5D,0xC0, // minpd XMM0,XMM0 0x66, 0x0F,0x5D,0x4D,0xE0, // minpd XMM1,-020h[RBP] 0x0F,0x5D,0xD1, // minps XMM2,XMM1 0x0F,0x5D,0x5D,0xE0, // minps XMM3,-020h[RBP] 0xF2,0x0F,0x5D,0xE2, // minsd XMM4,XMM2 0xF2,0x0F,0x5D,0x6D,0xD8, // minsd XMM5,-028h[RBP] 0xF3,0x0F,0x5D,0xF3, // minss XMM6,XMM3 0xF3,0x0F,0x5D,0x7D,0xD0, // minss XMM7,-030h[RBP] 0x66, 0x0F,0x28,0xCA, // movapd XMM1,XMM2 0x66, 0x0F,0x28,0x5D,0xE0, // movapd XMM3,-020h[RBP] 0x66, 0x0F,0x29,0x65,0xE0, // movapd -020h[RBP],XMM4 0x0F,0x28,0xCA, // movaps XMM1,XMM2 0x0F,0x28,0x5D,0xE0, // movaps XMM3,-020h[RBP] 0x0F,0x29,0x65,0xE0, // movaps -020h[RBP],XMM4 0x0F,0x6E,0xCB, // movd MM1,EBX 0x0F,0x6E,0x55,0xD0, // movd MM2,-030h[RBP] 0x0F,0x7E,0xDB, // movd EBX,MM3 0x0F,0x7E,0x65,0xD0, // movd -030h[RBP],MM4 0x66, 0x0F,0x6E,0xCB, // movd XMM1,EBX 0x66, 0x0F,0x6E,0x55,0xD0, // movd XMM2,-030h[RBP] 0x66, 0x0F,0x7E,0xDB, // movd EBX,XMM3 0x66, 0x0F,0x7E,0x65,0xD0, // movd -030h[RBP],XMM4 0x66, 0x0F,0x6F,0xCA, // movdqa XMM1,XMM2 0x66, 0x0F,0x6F,0x55,0xE0, // movdqa XMM2,-020h[RBP] 0x66, 0x0F,0x7F,0x65,0xE0, // movdqa -020h[RBP],XMM4 0xF3,0x0F,0x6F,0xCA, // movdqu XMM1,XMM2 0xF3,0x0F,0x6F,0x55,0xE0, // movdqu XMM2,-020h[RBP] 0xF3,0x0F,0x7F,0x65,0xE0, // movdqu -020h[RBP],XMM4 0xF2,0x0F,0xD6,0xDC, // movdq2q MM4,XMM3 0x0F,0x12,0xDC, // movhlps XMM4,XMM3 0x66, 0x0F,0x16,0x55,0xD8, // movhpd XMM2,-028h[RBP] 0x66, 0x0F,0x17,0x7D,0xD8, // movhpd -028h[RBP],XMM7 0x0F,0x16,0x55,0xD8, // movhps XMM2,-028h[RBP] 0x0F,0x17,0x7D,0xD8, // movhps -028h[RBP],XMM7 0x0F,0x16,0xDC, // movlhps XMM4,XMM3 0x66, 0x0F,0x12,0x55,0xD8, // movlpd XMM2,-028h[RBP] 0x66, 0x0F,0x13,0x7D,0xD8, // movlpd -028h[RBP],XMM7 0x0F,0x12,0x55,0xD8, // movlps XMM2,-028h[RBP] 0x0F,0x13,0x7D,0xD8, // movlps -028h[RBP],XMM7 0x66, 0x0F,0x50,0xF3, // movmskpd ESI,XMM3 0x0F,0x50,0xF3, // movmskps ESI,XMM3 0x66, 0x0F,0x59,0xC0, // mulpd XMM0,XMM0 0x66, 0x0F,0x59,0x4D,0xE0, // mulpd XMM1,-020h[RBP] 0x0F,0x59,0xD1, // mulps XMM2,XMM1 0x0F,0x59,0x5D,0xE0, // mulps XMM3,-020h[RBP] 0xF2,0x0F,0x59,0xE2, // mulsd XMM4,XMM2 0xF2,0x0F,0x59,0x6D,0xD8, // mulsd XMM5,-028h[RBP] 0xF3,0x0F,0x59,0xF3, // mulss XMM6,XMM3 0xF3,0x0F,0x59,0x7D,0xD0, // mulss XMM7,-030h[RBP] 0x66, 0x0F,0x51,0xC4, // sqrtpd XMM0,XMM4 0x66, 0x0F,0x51,0x4D,0xE0, // sqrtpd XMM1,-020h[RBP] 0x0F,0x51,0xD5, // sqrtps XMM2,XMM5 0x0F,0x51,0x5D,0xE0, // sqrtps XMM3,-020h[RBP] 0xF2,0x0F,0x51,0xE6, // sqrtsd XMM4,XMM6 0xF2,0x0F,0x51,0x6D,0xD8, // sqrtsd XMM5,-028h[RBP] 0xF3,0x0F,0x51,0xF7, // sqrtss XMM6,XMM7 0xF3,0x0F,0x51,0x7D,0xD0, // sqrtss XMM7,-030h[RBP] 0x66, 0x0F,0x5C,0xC4, // subpd XMM0,XMM4 0x66, 0x0F,0x5C,0x4D,0xE0, // subpd XMM1,-020h[RBP] 0x0F,0x5C,0xD5, // subps XMM2,XMM5 0x0F,0x5C,0x5D,0xE0, // subps XMM3,-020h[RBP] 0xF2,0x0F,0x5C,0xE6, // subsd XMM4,XMM6 0xF2,0x0F,0x5C,0x6D,0xD8, // subsd XMM5,-028h[RBP] 0xF3,0x0F,0x5C,0xF7, // subss XMM6,XMM7 0xF3,0x0F,0x5C,0x7D,0xD0, // subss XMM7,-030h[RBP] 0x0F,0x01,0xE0, // smsw EAX ]; int i; asm { call L1 ; ud2 ; syscall ; sysenter ; sysexit ; sysret ; lfence ; mfence ; sfence ; fxsave [RAX] ; fxrstor [RAX] ; ldmxcsr [RAX] ; stmxcsr [RAX] ; clflush [RAX] ; addps XMM1,[RAX] ; addps XMM1,XMM2 ; addpd XMM0,[RBX] ; addpd XMM2,XMM1 ; addsd XMM1,[RAX] ; addsd XMM1,XMM2 ; addss XMM5,[RSI] ; addss XMM6,XMM7 ; andps XMM1,[RAX] ; andps XMM1,XMM2 ; andpd XMM0,[RBX] ; andpd XMM2,XMM1 ; andnps XMM1,[RAX] ; andnps XMM1,XMM2 ; andnpd XMM0,[RBX] ; andnpd XMM2,XMM1 ; cmpsd ; cmpps XMM1,[RAX],1 ; cmpps XMM1,XMM2,2 ; cmppd XMM0,[RBX],3 ; cmppd XMM2,XMM1,4 ; cmpsd XMM1,[RAX],5 ; cmpsd XMM1,XMM2,6 ; cmpss XMM5,[RSI],7 ; cmpss XMM6,XMM7,0 ; comisd XMM1,[RAX] ; comisd XMM1,m64[RBP] ; comisd XMM1,XMM2 ; comiss XMM5,[RSI] ; comiss XMM6,XMM7 ; cvtdq2pd XMM3,XMM4 ; cvtdq2pd XMM3,m64[RBP] ; cvtdq2ps XMM3,XMM4 ; cvtdq2ps XMM3,m128[RBP] ; cvtpd2dq XMM3,XMM4 ; cvtpd2dq XMM3,m128[RBP] ; cvtpd2pi MM3,XMM4 ; cvtpd2pi MM3,m128[RBP] ; cvtpd2ps XMM3,XMM4 ; cvtpd2ps XMM3,m128[RBP] ; cvtpi2pd XMM3,MM4 ; cvtpi2pd XMM3,m64[RBP] ; cvtpi2ps XMM3,MM4 ; cvtpi2ps XMM3,m64[RBP] ; cvtps2dq XMM3,XMM4 ; cvtps2dq XMM3,m128[RBP] ; cvtps2pd XMM3,XMM4 ; cvtps2pd XMM3,m64[RBP] ; cvtps2pi MM3,XMM4 ; cvtps2pi MM3,m64[RBP] ; cvtsd2si ECX,XMM4 ; cvtsd2si EDX,m64[RBP] ; cvtsd2ss XMM3,XMM4 ; cvtsd2ss XMM3,m64[RBP] ; cvtsi2sd XMM3,EDX ; cvtsi2sd XMM3,m32[RBP] ; cvtsi2ss XMM3,EDX ; cvtsi2ss XMM3,m32[RBP] ; cvtss2sd XMM3,XMM4 ; cvtss2sd XMM3,m32[RBP] ; cvtss2si EDI,XMM4 ; cvtss2si EDI,m32[RBP] ; cvttpd2pi MM3,XMM4 ; cvttpd2pi MM7,m128[RBP] ; cvttpd2dq XMM3,XMM4 ; cvttpd2dq XMM7,m128[RBP] ; cvttps2dq XMM3,XMM4 ; cvttps2dq XMM7,m128[RBP] ; cvttps2pi MM3,XMM4 ; cvttps2pi MM7,m64[RBP] ; cvttsd2si EAX,XMM4 ; cvttsd2si ECX,m64[RBP] ; cvttss2si EAX,XMM4 ; cvttss2si ECX,m32[RBP] ; divpd XMM5,XMM0 ; divpd XMM5,m128[RBP] ; divps XMM5,XMM0 ; divps XMM5,m128[RBP] ; divsd XMM5,XMM0 ; divsd XMM5,m64[RBP] ; divss XMM5,XMM0 ; divss XMM5,m32[RBP] ; maskmovdqu XMM1,XMM2 ; maskmovq MM3,MM4 ; maxpd XMM0,XMM0 ; maxpd XMM1,m128[RBP] ; maxps XMM2,XMM1 ; maxps XMM3,m128[RBP] ; maxsd XMM4,XMM2 ; maxsd XMM5,m64[RBP] ; maxss XMM6,XMM3 ; maxss XMM7,m32[RBP] ; minpd XMM0,XMM0 ; minpd XMM1,m128[RBP] ; minps XMM2,XMM1 ; minps XMM3,m128[RBP] ; minsd XMM4,XMM2 ; minsd XMM5,m64[RBP] ; minss XMM6,XMM3 ; minss XMM7,m32[RBP] ; movapd XMM1,XMM2 ; movapd XMM3,m128[RBP] ; movapd m128[RBP],XMM4 ; movaps XMM1,XMM2 ; movaps XMM3,m128[RBP] ; movaps m128[RBP],XMM4 ; movd MM1,EBX ; movd MM2,m32[RBP] ; movd EBX,MM3 ; movd m32[RBP],MM4 ; movd XMM1,EBX ; movd XMM2,m32[RBP] ; movd EBX,XMM3 ; movd m32[RBP],XMM4 ; movdqa XMM1,XMM2 ; movdqa XMM2,m128[RBP] ; movdqa m128[RBP],XMM4 ; movdqu XMM1,XMM2 ; movdqu XMM2,m128[RBP] ; movdqu m128[RBP],XMM4 ; movdq2q MM3,XMM4 ; movhlps XMM3,XMM4 ; movhpd XMM2,m64[RBP] ; movhpd m64[RBP],XMM7 ; movhps XMM2,m64[RBP] ; movhps m64[RBP],XMM7 ; movlhps XMM3,XMM4 ; movlpd XMM2,m64[RBP] ; movlpd m64[RBP],XMM7 ; movlps XMM2,m64[RBP] ; movlps m64[RBP],XMM7 ; movmskpd ESI,XMM3 ; movmskps ESI,XMM3 ; mulpd XMM0,XMM0 ; mulpd XMM1,m128[RBP] ; mulps XMM2,XMM1 ; mulps XMM3,m128[RBP] ; mulsd XMM4,XMM2 ; mulsd XMM5,m64[RBP] ; mulss XMM6,XMM3 ; mulss XMM7,m32[RBP] ; sqrtpd XMM0,XMM4 ; sqrtpd XMM1,m128[RBP] ; sqrtps XMM2,XMM5 ; sqrtps XMM3,m128[RBP] ; sqrtsd XMM4,XMM6 ; sqrtsd XMM5,m64[RBP] ; sqrtss XMM6,XMM7 ; sqrtss XMM7,m32[RBP] ; subpd XMM0,XMM4 ; subpd XMM1,m128[RBP] ; subps XMM2,XMM5 ; subps XMM3,m128[RBP] ; subsd XMM4,XMM6 ; subsd XMM5,m64[RBP] ; subss XMM6,XMM7 ; subss XMM7,m32[RBP] ; smsw EAX ; L1: ; pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { //printf("[%d] = %02x %02x\n", i, p[i], data[i]); assert(p[i] == data[i]); } } /****************************************************/ void test14() { byte m8; short m16; int m32; long m64; M128 m128; ubyte *p; static ubyte[] data = [ 0x66, 0x0F,0x50,0xF3, // movmskpd ESI,XMM3 0x0F,0x50,0xF3, // movmskps ESI,XMM3 0x66, 0x0F,0xE7,0x55,0xE0, // movntdq -020h[RBP],XMM2 0x0F,0xC3,0x4D,0xD4, // movnti -02Ch[RBP],ECX 0x66, 0x0F,0x2B,0x5D,0xE0, // movntpd -020h[RBP],XMM3 0x0F,0x2B,0x65,0xE0, // movntps -020h[RBP],XMM4 0x0F,0xE7,0x6D,0xD8, // movntq -028h[RBP],MM5 0x0F,0x6F,0xCA, // movq MM1,MM2 0x0F,0x6F,0x55,0xD8, // movq MM2,-028h[RBP] 0x0F,0x7F,0x5D,0xD8, // movq -028h[RBP],MM3 0xF3,0x0F,0x7E,0xCA, // movq XMM1,XMM2 0xF3,0x0F,0x7E,0x55,0xD8, // movq XMM2,-028h[RBP] 0x66, 0x0F,0xD6,0x5D,0xD8, // movq -028h[RBP],XMM3 0xF3,0x0F,0xD6,0xDA, // movq2dq XMM3,MM2 0xA5, // movsd 0xF2,0x0F,0x10,0xCA, // movsd XMM1,XMM2 0xF2,0x0F,0x10,0x5D,0xD8, // movsd XMM3,-028h[RBP] 0xF2,0x0F,0x11,0x65,0xD8, // movsd -028h[RBP],XMM4 0xF3,0x0F,0x10,0xCA, // movss XMM1,XMM2 0xF3,0x0F,0x10,0x5D,0xD4, // movss XMM3,-02Ch[RBP] 0xF3,0x0F,0x11,0x65,0xD4, // movss -02Ch[RBP],XMM4 0x66, 0x0F,0x10,0xCA, // movupd XMM1,XMM2 0x66, 0x0F,0x10,0x5D,0xE0, // movupd XMM3,-020h[RBP] 0x66, 0x0F,0x11,0x65,0xE0, // movupd -020h[RBP],XMM4 0x0F,0x10,0xCA, // movups XMM1,XMM2 0x0F,0x10,0x5D,0xE0, // movups XMM3,-020h[RBP] 0x0F,0x11,0x65,0xE0, // movups -020h[RBP],XMM4 0x66, 0x0F,0x56,0xCA, // orpd XMM1,XMM2 0x66, 0x0F,0x56,0x5D,0xE0, // orpd XMM3,-020h[RBP] 0x0F,0x56,0xCA, // orps XMM1,XMM2 0x0F,0x56,0x5D,0xE0, // orps XMM3,-020h[RBP] 0x0F,0x63,0xCA, // packsswb MM1,MM2 0x0F,0x63,0x5D,0xD8, // packsswb MM3,-028h[RBP] 0x66, 0x0F,0x63,0xCA, // packsswb XMM1,XMM2 0x66, 0x0F,0x63,0x5D,0xE0, // packsswb XMM3,-020h[RBP] 0x0F,0x6B,0xCA, // packssdw MM1,MM2 0x0F,0x6B,0x5D,0xD8, // packssdw MM3,-028h[RBP] 0x66, 0x0F,0x6B,0xCA, // packssdw XMM1,XMM2 0x66, 0x0F,0x6B,0x5D,0xE0, // packssdw XMM3,-020h[RBP] 0x0F,0x67,0xCA, // packuswb MM1,MM2 0x0F,0x67,0x5D,0xD8, // packuswb MM3,-028h[RBP] 0x66, 0x0F,0x67,0xCA, // packuswb XMM1,XMM2 0x66, 0x0F,0x67,0x5D,0xE0, // packuswb XMM3,-020h[RBP] 0x0F,0xFC,0xCA, // paddb MM1,MM2 0x0F,0xFC,0x5D,0xD8, // paddb MM3,-028h[RBP] 0x66, 0x0F,0xFC,0xCA, // paddb XMM1,XMM2 0x66, 0x0F,0xFC,0x5D,0xE0, // paddb XMM3,-020h[RBP] 0x0F,0xFD,0xCA, // paddw MM1,MM2 0x0F,0xFD,0x5D,0xD8, // paddw MM3,-028h[RBP] 0x66, 0x0F,0xFD,0xCA, // paddw XMM1,XMM2 0x66, 0x0F,0xFD,0x5D,0xE0, // paddw XMM3,-020h[RBP] 0x0F,0xFE,0xCA, // paddd MM1,MM2 0x0F,0xFE,0x5D,0xD8, // paddd MM3,-028h[RBP] 0x66, 0x0F,0xFE,0xCA, // paddd XMM1,XMM2 0x66, 0x0F,0xFE,0x5D,0xE0, // paddd XMM3,-020h[RBP] 0x0F,0xD4,0xCA, // paddq MM1,MM2 0x0F,0xD4,0x5D,0xD8, // paddq MM3,-028h[RBP] 0x66, 0x0F,0xD4,0xCA, // paddq XMM1,XMM2 0x66, 0x0F,0xD4,0x5D,0xE0, // paddq XMM3,-020h[RBP] 0x0F,0xEC,0xCA, // paddsb MM1,MM2 0x0F,0xEC,0x5D,0xD8, // paddsb MM3,-028h[RBP] 0x66, 0x0F,0xEC,0xCA, // paddsb XMM1,XMM2 0x66, 0x0F,0xEC,0x5D,0xE0, // paddsb XMM3,-020h[RBP] 0x0F,0xED,0xCA, // paddsw MM1,MM2 0x0F,0xED,0x5D,0xD8, // paddsw MM3,-028h[RBP] 0x66, 0x0F,0xED,0xCA, // paddsw XMM1,XMM2 0x66, 0x0F,0xED,0x5D,0xE0, // paddsw XMM3,-020h[RBP] 0x0F,0xDC,0xCA, // paddusb MM1,MM2 0x0F,0xDC,0x5D,0xD8, // paddusb MM3,-028h[RBP] 0x66, 0x0F,0xDC,0xCA, // paddusb XMM1,XMM2 0x66, 0x0F,0xDC,0x5D,0xE0, // paddusb XMM3,-020h[RBP] 0x0F,0xDD,0xCA, // paddusw MM1,MM2 0x0F,0xDD,0x5D,0xD8, // paddusw MM3,-028h[RBP] 0x66, 0x0F,0xDD,0xCA, // paddusw XMM1,XMM2 0x66, 0x0F,0xDD,0x5D,0xE0, // paddusw XMM3,-020h[RBP] 0x0F,0xDB,0xCA, // pand MM1,MM2 0x0F,0xDB,0x5D,0xD8, // pand MM3,-028h[RBP] 0x66, 0x0F,0xDB,0xCA, // pand XMM1,XMM2 0x66, 0x0F,0xDB,0x5D,0xE0, // pand XMM3,-020h[RBP] 0x0F,0xDF,0xCA, // pandn MM1,MM2 0x0F,0xDF,0x5D,0xD8, // pandn MM3,-028h[RBP] 0x66, 0x0F,0xDF,0xCA, // pandn XMM1,XMM2 0x66, 0x0F,0xDF,0x5D,0xE0, // pandn XMM3,-020h[RBP] 0x0F,0xE0,0xCA, // pavgb MM1,MM2 0x0F,0xE0,0x5D,0xD8, // pavgb MM3,-028h[RBP] 0x66, 0x0F,0xE0,0xCA, // pavgb XMM1,XMM2 0x66, 0x0F,0xE0,0x5D,0xE0, // pavgb XMM3,-020h[RBP] 0x0F,0xE3,0xCA, // pavgw MM1,MM2 0x0F,0xE3,0x5D,0xD8, // pavgw MM3,-028h[RBP] 0x66, 0x0F,0xE3,0xCA, // pavgw XMM1,XMM2 0x66, 0x0F,0xE3,0x5D,0xE0, // pavgw XMM3,-020h[RBP] 0x0F,0x74,0xCA, // pcmpeqb MM1,MM2 0x0F,0x74,0x5D,0xD8, // pcmpeqb MM3,-028h[RBP] 0x66, 0x0F,0x74,0xCA, // pcmpeqb XMM1,XMM2 0x66, 0x0F,0x74,0x5D,0xE0, // pcmpeqb XMM3,-020h[RBP] 0x0F,0x75,0xCA, // pcmpeqw MM1,MM2 0x0F,0x75,0x5D,0xD8, // pcmpeqw MM3,-028h[RBP] 0x66, 0x0F,0x75,0xCA, // pcmpeqw XMM1,XMM2 0x66, 0x0F,0x75,0x5D,0xE0, // pcmpeqw XMM3,-020h[RBP] 0x0F,0x76,0xCA, // pcmpeqd MM1,MM2 0x0F,0x76,0x5D,0xD8, // pcmpeqd MM3,-028h[RBP] 0x66, 0x0F,0x76,0xCA, // pcmpeqd XMM1,XMM2 0x66, 0x0F,0x76,0x5D,0xE0, // pcmpeqd XMM3,-020h[RBP] 0x0F,0x64,0xCA, // pcmpgtb MM1,MM2 0x0F,0x64,0x5D,0xD8, // pcmpgtb MM3,-028h[RBP] 0x66, 0x0F,0x64,0xCA, // pcmpgtb XMM1,XMM2 0x66, 0x0F,0x64,0x5D,0xE0, // pcmpgtb XMM3,-020h[RBP] 0x0F,0x65,0xCA, // pcmpgtw MM1,MM2 0x0F,0x65,0x5D,0xD8, // pcmpgtw MM3,-028h[RBP] 0x66, 0x0F,0x65,0xCA, // pcmpgtw XMM1,XMM2 0x66, 0x0F,0x65,0x5D,0xE0, // pcmpgtw XMM3,-020h[RBP] 0x0F,0x66,0xCA, // pcmpgtd MM1,MM2 0x0F,0x66,0x5D,0xD8, // pcmpgtd MM3,-028h[RBP] 0x66, 0x0F,0x66,0xCA, // pcmpgtd XMM1,XMM2 0x66, 0x0F,0x66,0x5D,0xE0, // pcmpgtd XMM3,-020h[RBP] 0x0F,0xC5,0xD6,0x07, // pextrw EDX,MM6,7 0x66, 0x0F,0xC5,0xD6,0x07, // pextrw EDX,XMM6,7 0x0F,0xC4,0xF2,0x07, // pinsrw MM6,EDX,7 0x0F,0xC4,0x75,0xD2,0x07, // pinsrw MM6,-02Eh[RBP],7 0x66, 0x0F,0xC4,0xF2,0x07, // pinsrw XMM6,EDX,7 0x66, 0x0F,0xC4,0x75,0xD2,0x07, // pinsrw XMM6,-02Eh[RBP],7 0x0F,0xF5,0xCA, // pmaddwd MM1,MM2 0x0F,0xF5,0x5D,0xD8, // pmaddwd MM3,-028h[RBP] 0x66, 0x0F,0xF5,0xCA, // pmaddwd XMM1,XMM2 0x66, 0x0F,0xF5,0x5D,0xE0, // pmaddwd XMM3,-020h[RBP] 0x0F,0xEE,0xCA, // pmaxsw MM1,XMM2 0x0F,0xEE,0x5D,0xD8, // pmaxsw MM3,-028h[RBP] 0x66, 0x0F,0xEE,0xCA, // pmaxsw XMM1,XMM2 0x66, 0x0F,0xEE,0x5D,0xE0, // pmaxsw XMM3,-020h[RBP] 0x0F,0xDE,0xCA, // pmaxub MM1,XMM2 0x0F,0xDE,0x5D,0xD8, // pmaxub MM3,-028h[RBP] 0x66, 0x0F,0xDE,0xCA, // pmaxub XMM1,XMM2 0x66, 0x0F,0xDE,0x5D,0xE0, // pmaxub XMM3,-020h[RBP] 0x0F,0xEA,0xCA, // pminsw MM1,MM2 0x0F,0xEA,0x5D,0xD8, // pminsw MM3,-028h[RBP] 0x66, 0x0F,0xEA,0xCA, // pminsw XMM1,XMM2 0x66, 0x0F,0xEA,0x5D,0xE0, // pminsw XMM3,-020h[RBP] 0x0F,0xDA,0xCA, // pminub MM1,MM2 0x0F,0xDA,0x5D,0xD8, // pminub MM3,-028h[RBP] 0x66, 0x0F,0xDA,0xCA, // pminub XMM1,XMM2 0x66, 0x0F,0xDA,0x5D,0xE0, // pminub XMM3,-020h[RBP] 0x0F,0xD7,0xC8, // pmovmskb ECX,MM0 0x66, 0x0F,0xD7,0xCE, // pmovmskb ECX,XMM6 0x0F,0xE4,0xCA, // pmulhuw MM1,MM2 0x0F,0xE4,0x5D,0xD8, // pmulhuw MM3,-028h[RBP] 0x66, 0x0F,0xE4,0xCA, // pmulhuw XMM1,XMM2 0x66, 0x0F,0xE4,0x5D,0xE0, // pmulhuw XMM3,-020h[RBP] 0x0F,0xE5,0xCA, // pmulhw MM1,MM2 0x0F,0xE5,0x5D,0xD8, // pmulhw MM3,-028h[RBP] 0x66, 0x0F,0xE5,0xCA, // pmulhw XMM1,XMM2 0x66, 0x0F,0xE5,0x5D,0xE0, // pmulhw XMM3,-020h[RBP] 0x0F,0xD5,0xCA, // pmullw MM1,MM2 0x0F,0xD5,0x5D,0xD8, // pmullw MM3,-028h[RBP] 0x66, 0x0F,0xD5,0xCA, // pmullw XMM1,XMM2 0x66, 0x0F,0xD5,0x5D,0xE0, // pmullw XMM3,-020h[RBP] 0x0F,0xF4,0xCA, // pmuludq MM1,MM2 0x0F,0xF4,0x5D,0xD8, // pmuludq MM3,-028h[RBP] 0x66, 0x0F,0xF4,0xCA, // pmuludq XMM1,XMM2 0x66, 0x0F,0xF4,0x5D,0xE0, // pmuludq XMM3,-020h[RBP] 0x0F,0xEB,0xCA, // por MM1,MM2 0x0F,0xEB,0x5D,0xD8, // por MM3,-028h[RBP] 0x66, 0x0F,0xEB,0xCA, // por XMM1,XMM2 0x66, 0x0F,0xEB,0x5D,0xE0, // por XMM3,-020h[RBP] 0x0F,0x18,0x4D,0xD0, // prefetcht0 -030h[RBP] 0x0F,0x18,0x55,0xD0, // prefetcht1 -030h[RBP] 0x0F,0x18,0x5D,0xD0, // prefetcht2 -030h[RBP] 0x0F,0x18,0x45,0xD0, // prefetchnta -030h[RBP] 0x0F,0x0D,0x4D,0xD0, // prefetchw -030h[RBP] 0x0F,0x0D,0x55,0xD0, // prefetchwt1 -030h[RBP] 0x0F,0xF6,0xCA, // psadbw MM1,MM2 0x0F,0xF6,0x5D,0xD8, // psadbw MM3,-028h[RBP] 0x66, 0x0F,0xF6,0xCA, // psadbw XMM1,XMM2 0x66, 0x0F,0xF6,0x5D,0xE0, // psadbw XMM3,-020h[RBP] 0x66, 0x0F,0x70,0xCA,0x03, // pshufd XMM1,XMM2,3 0x66, 0x0F,0x70,0x5D,0xE0,0x03, // pshufd XMM3,-020h[RBP],3 0xF3,0x0F,0x70,0xCA,0x03, // pshufhw XMM1,XMM2,3 0xF3,0x0F,0x70,0x5D,0xE0,0x03, // pshufhw XMM3,-020h[RBP],3 0xF2,0x0F,0x70,0xCA,0x03, // pshuflw XMM1,XMM2,3 0xF2,0x0F,0x70,0x5D,0xE0,0x03, // pshuflw XMM3,-020h[RBP],3 0x0F,0x70,0xCA,0x03, // pshufw MM1,MM2,3 0x0F,0x70,0x5D,0xD8,0x03, // pshufw MM3,-028h[RBP],3 0x66, 0x0F,0x73,0xF9,0x18, // pslldq XMM1,020h 0x0F,0xF1,0xCA, // psllw MM1,MM2 0x0F,0xF1,0x4D,0xD8, // psllw MM1,-028h[RBP] 0x66, 0x0F,0xF1,0xCA, // psllw XMM1,XMM2 0x66, 0x0F,0xF1,0x4D,0xE0, // psllw XMM1,-020h[RBP] 0x0F,0x71,0xF1,0x15, // psraw MM1,015h 0x66, 0x0F,0x71,0xF1,0x15, // psraw XMM1,015h 0x0F,0xF2,0xCA, // pslld MM1,MM2 0x0F,0xF2,0x4D,0xD8, // pslld MM1,-028h[RBP] 0x66, 0x0F,0xF2,0xCA, // pslld XMM1,XMM2 0x66, 0x0F,0xF2,0x4D,0xE0, // pslld XMM1,-020h[RBP] 0x0F,0x72,0xF1,0x15, // psrad MM1,015h 0x66, 0x0F,0x72,0xF1,0x15, // psrad XMM1,015h 0x0F,0xF3,0xCA, // psllq MM1,MM2 0x0F,0xF3,0x4D,0xD8, // psllq MM1,-028h[RBP] 0x66, 0x0F,0xF3,0xCA, // psllq XMM1,XMM2 0x66, 0x0F,0xF3,0x4D,0xE0, // psllq XMM1,-020h[RBP] 0x0F,0x73,0xF1,0x15, // psllq MM1,015h 0x66, 0x0F,0x73,0xF1,0x15, // psllq XMM1,015h 0x0F,0xE1,0xCA, // psraw MM1,MM2 0x0F,0xE1,0x4D,0xD8, // psraw MM1,-028h[RBP] 0x66, 0x0F,0xE1,0xCA, // psraw XMM1,XMM2 0x66, 0x0F,0xE1,0x4D,0xE0, // psraw XMM1,-020h[RBP] 0x0F,0x71,0xE1,0x15, // psraw MM1,015h 0x66, 0x0F,0x71,0xE1,0x15, // psraw XMM1,015h 0x0F,0xE2,0xCA, // psrad MM1,MM2 0x0F,0xE2,0x4D,0xD8, // psrad MM1,-028h[RBP] 0x66, 0x0F,0xE2,0xCA, // psrad XMM1,XMM2 0x66, 0x0F,0xE2,0x4D,0xE0, // psrad XMM1,-020h[RBP] 0x0F,0x72,0xE1,0x15, // psrad MM1,015h 0x66, 0x0F,0x72,0xE1,0x15, // psrad XMM1,015h 0x66, 0x0F,0x73,0xD9,0x18, // psrldq XMM1,020h 0x0F,0xD1,0xCA, // psrlw MM1,MM2 0x0F,0xD1,0x4D,0xD8, // psrlw MM1,-028h[RBP] 0x66, 0x0F,0xD1,0xCA, // psrlw XMM1,XMM2 0x66, 0x0F,0xD1,0x4D,0xE0, // psrlw XMM1,-020h[RBP] 0x0F,0x71,0xD1,0x15, // psrlw MM1,015h 0x66, 0x0F,0x71,0xD1,0x15, // psrlw XMM1,015h 0x0F,0xD2,0xCA, // psrld MM1,MM2 0x0F,0xD2,0x4D,0xD8, // psrld MM1,-028h[RBP] 0x66, 0x0F,0xD2,0xCA, // psrld XMM1,XMM2 0x66, 0x0F,0xD2,0x4D,0xE0, // psrld XMM1,-020h[RBP] 0x0F,0x72,0xD1,0x15, // psrld MM1,015h 0x66, 0x0F,0x72,0xD1,0x15, // psrld XMM1,015h 0x0F,0xD3,0xCA, // psrlq MM1,MM2 0x0F,0xD3,0x4D,0xD8, // psrlq MM1,-028h[RBP] 0x66, 0x0F,0xD3,0xCA, // psrlq XMM1,XMM2 0x66, 0x0F,0xD3,0x4D,0xE0, // psrlq XMM1,-020h[RBP] 0x0F,0x73,0xD1,0x15, // psrlq MM1,015h 0x66, 0x0F,0x73,0xD1,0x15, // psrlq XMM1,015h 0x0F,0xF8,0xCA, // psubb MM1,MM2 0x0F,0xF8,0x4D,0xD8, // psubb MM1,-028h[RBP] 0x66, 0x0F,0xF8,0xCA, // psubb XMM1,XMM2 0x66, 0x0F,0xF8,0x4D,0xE0, // psubb XMM1,-020h[RBP] 0x0F,0xF9,0xCA, // psubw MM1,MM2 0x0F,0xF9,0x4D,0xD8, // psubw MM1,-028h[RBP] 0x66, 0x0F,0xF9,0xCA, // psubw XMM1,XMM2 0x66, 0x0F,0xF9,0x4D,0xE0, // psubw XMM1,-020h[RBP] 0x0F,0xFA,0xCA, // psubd MM1,MM2 0x0F,0xFA,0x4D,0xD8, // psubd MM1,-028h[RBP] 0x66, 0x0F,0xFA,0xCA, // psubd XMM1,XMM2 0x66, 0x0F,0xFA,0x4D,0xE0, // psubd XMM1,-020h[RBP] 0x0F,0xFB,0xCA, // psubq MM1,MM2 0x0F,0xFB,0x4D,0xD8, // psubq MM1,-028h[RBP] 0x66, 0x0F,0xFB,0xCA, // psubq XMM1,XMM2 0x66, 0x0F,0xFB,0x4D,0xE0, // psubq XMM1,-020h[RBP] 0x0F,0xE8,0xCA, // psubsb MM1,MM2 0x0F,0xE8,0x4D,0xD8, // psubsb MM1,-028h[RBP] 0x66, 0x0F,0xE8,0xCA, // psubsb XMM1,XMM2 0x66, 0x0F,0xE8,0x4D,0xE0, // psubsb XMM1,-020h[RBP] 0x0F,0xE9,0xCA, // psubsw MM1,MM2 0x0F,0xE9,0x4D,0xD8, // psubsw MM1,-028h[RBP] 0x66, 0x0F,0xE9,0xCA, // psubsw XMM1,XMM2 0x66, 0x0F,0xE9,0x4D,0xE0, // psubsw XMM1,-020h[RBP] 0x0F,0xD8,0xCA, // psubusb MM1,MM2 0x0F,0xD8,0x4D,0xD8, // psubusb MM1,-028h[RBP] 0x66, 0x0F,0xD8,0xCA, // psubusb XMM1,XMM2 0x66, 0x0F,0xD8,0x4D,0xE0, // psubusb XMM1,-020h[RBP] 0x0F,0xD9,0xCA, // psubusw MM1,MM2 0x0F,0xD9,0x4D,0xD8, // psubusw MM1,-028h[RBP] 0x66, 0x0F,0xD9,0xCA, // psubusw XMM1,XMM2 0x66, 0x0F,0xD9,0x4D,0xE0, // psubusw XMM1,-020h[RBP] 0x0F,0x68,0xCA, // punpckhbw MM1,MM2 0x0F,0x68,0x4D,0xD8, // punpckhbw MM1,-028h[RBP] 0x66, 0x0F,0x68,0xCA, // punpckhbw XMM1,XMM2 0x66, 0x0F,0x68,0x4D,0xE0, // punpckhbw XMM1,-020h[RBP] 0x0F,0x69,0xCA, // punpckhwd MM1,MM2 0x0F,0x69,0x4D,0xD8, // punpckhwd MM1,-028h[RBP] 0x66, 0x0F,0x69,0xCA, // punpckhwd XMM1,XMM2 0x66, 0x0F,0x69,0x4D,0xE0, // punpckhwd XMM1,-020h[RBP] 0x0F,0x6A,0xCA, // punpckhdq MM1,MM2 0x0F,0x6A,0x4D,0xD8, // punpckhdq MM1,-028h[RBP] 0x66, 0x0F,0x6A,0xCA, // punpckhdq XMM1,XMM2 0x66, 0x0F,0x6A,0x4D,0xE0, // punpckhdq XMM1,-020h[RBP] 0x66, 0x0F,0x6D,0xCA, // punpckhqdq XMM1,XMM2 0x66, 0x0F,0x6D,0x4D,0xE0, // punpckhqdq XMM1,-020h[RBP] 0x0F,0x60,0xCA, // punpcklbw MM1,MM2 0x0F,0x60,0x4D,0xD8, // punpcklbw MM1,-028h[RBP] 0x66, 0x0F,0x60,0xCA, // punpcklbw XMM1,XMM2 0x66, 0x0F,0x60,0x4D,0xE0, // punpcklbw XMM1,-020h[RBP] 0x0F,0x61,0xCA, // punpcklwd MM1,MM2 0x0F,0x61,0x4D,0xD8, // punpcklwd MM1,-028h[RBP] 0x66, 0x0F,0x61,0xCA, // punpcklwd XMM1,XMM2 0x66, 0x0F,0x61,0x4D,0xE0, // punpcklwd XMM1,-020h[RBP] 0x0F,0x62,0xCA, // punpckldq MM1,MM2 0x0F,0x62,0x4D,0xD8, // punpckldq MM1,-028h[RBP] 0x66, 0x0F,0x62,0xCA, // punpckldq XMM1,XMM2 0x66, 0x0F,0x62,0x4D,0xE0, // punpckldq XMM1,-020h[RBP] 0x66, 0x0F,0x6C,0xCA, // punpcklqdq XMM1,XMM2 0x66, 0x0F,0x6C,0x4D,0xE0, // punpcklqdq XMM1,-020h[RBP] 0x0F,0xEF,0xCA, // pxor MM1,MM2 0x0F,0xEF,0x4D,0xD8, // pxor MM1,-028h[RBP] 0x66, 0x0F,0xEF,0xCA, // pxor XMM1,XMM2 0x66, 0x0F,0xEF,0x4D,0xE0, // pxor XMM1,-020h[RBP] 0x0F,0x53,0xCA, // rcpps XMM1,XMM2 0x0F,0x53,0x4D,0xE0, // rcpps XMM1,-020h[RBP] 0xF3,0x0F,0x53,0xCA, // rcpss XMM1,XMM2 0xF3,0x0F,0x53,0x4D,0xD4, // rcpss XMM1,-02Ch[RBP] 0x0F,0x52,0xCA, // rsqrtps XMM1,XMM2 0x0F,0x52,0x4D,0xE0, // rsqrtps XMM1,-020h[RBP] 0xF3,0x0F,0x52,0xCA, // rsqrtss XMM1,XMM2 0xF3,0x0F,0x52,0x4D,0xD4, // rsqrtss XMM1,-02Ch[RBP] 0x66, 0x0F,0xC6,0xCA,0x03, // shufpd XMM1,XMM2,3 0x66, 0x0F,0xC6,0x4D,0xE0,0x04, // shufpd XMM1,-020h[RBP],4 0x0F,0xC6,0xCA,0x03, // shufps XMM1,XMM2,3 0x0F,0xC6,0x4D,0xE0,0x04, // shufps XMM1,-020h[RBP],4 0x66, 0x0F,0x2E,0xE6, // ucimisd XMM4,XMM6 0x66, 0x0F,0x2E,0x6D,0xD8, // ucimisd XMM5,-028h[RBP] 0x0F,0x2E,0xF7, // ucomiss XMM6,XMM7 0x0F,0x2E,0x7D,0xD4, // ucomiss XMM7,-02Ch[RBP] 0x66, 0x0F,0x15,0xE6, // uppckhpd XMM4,XMM6 0x66, 0x0F,0x15,0x6D,0xE0, // uppckhpd XMM5,-020h[RBP] 0x0F,0x15,0xE6, // unpckhps XMM4,XMM6 0x0F,0x15,0x6D,0xE0, // unpckhps XMM5,-020h[RBP] 0x66, 0x0F,0x14,0xE6, // uppcklpd XMM4,XMM6 0x66, 0x0F,0x14,0x6D,0xE0, // uppcklpd XMM5,-020h[RBP] 0x0F,0x14,0xE6, // unpcklps XMM4,XMM6 0x0F,0x14,0x6D,0xE0, // unpcklps XMM5,-020h[RBP] 0x66, 0x0F,0x57,0xCA, // xorpd XMM1,XMM2 0x66, 0x0F,0x57,0x4D,0xE0, // xorpd XMM1,-020h[RBP] 0x0F,0x57,0xCA, // xorps XMM1,XMM2 0x0F,0x57,0x4D,0xE0, // xorps XMM1,-020h[RBP] ]; int i; asm { call L1 ; movmskpd ESI,XMM3 ; movmskps ESI,XMM3 ; movntdq m128[RBP],XMM2 ; movnti m32[RBP],ECX ; movntpd m128[RBP],XMM3 ; movntps m128[RBP],XMM4 ; movntq m64[RBP],MM5 ; movq MM1,MM2 ; movq MM2,m64[RBP] ; movq m64[RBP],MM3 ; movq XMM1,XMM2 ; movq XMM2,m64[RBP] ; movq m64[RBP],XMM3 ; movq2dq XMM3,MM2 ; movsd ; movsd XMM1,XMM2 ; movsd XMM3,m64[RBP] ; movsd m64[RBP],XMM4 ; movss XMM1,XMM2 ; movss XMM3,m32[RBP] ; movss m32[RBP],XMM4 ; movupd XMM1,XMM2 ; movupd XMM3,m128[RBP] ; movupd m128[RBP],XMM4 ; movups XMM1,XMM2 ; movups XMM3,m128[RBP] ; movups m128[RBP],XMM4 ; orpd XMM1,XMM2 ; orpd XMM3,m128[RBP] ; orps XMM1,XMM2 ; orps XMM3,m128[RBP] ; packsswb MM1,MM2 ; packsswb MM3,m64[RBP] ; packsswb XMM1,XMM2 ; packsswb XMM3,m128[RBP] ; packssdw MM1,MM2 ; packssdw MM3,m64[RBP] ; packssdw XMM1,XMM2 ; packssdw XMM3,m128[RBP] ; packuswb MM1,MM2 ; packuswb MM3,m64[RBP] ; packuswb XMM1,XMM2 ; packuswb XMM3,m128[RBP] ; paddb MM1,MM2 ; paddb MM3,m64[RBP] ; paddb XMM1,XMM2 ; paddb XMM3,m128[RBP] ; paddw MM1,MM2 ; paddw MM3,m64[RBP] ; paddw XMM1,XMM2 ; paddw XMM3,m128[RBP] ; paddd MM1,MM2 ; paddd MM3,m64[RBP] ; paddd XMM1,XMM2 ; paddd XMM3,m128[RBP] ; paddq MM1,MM2 ; paddq MM3,m64[RBP] ; paddq XMM1,XMM2 ; paddq XMM3,m128[RBP] ; paddsb MM1,MM2 ; paddsb MM3,m64[RBP] ; paddsb XMM1,XMM2 ; paddsb XMM3,m128[RBP] ; paddsw MM1,MM2 ; paddsw MM3,m64[RBP] ; paddsw XMM1,XMM2 ; paddsw XMM3,m128[RBP] ; paddusb MM1,MM2 ; paddusb MM3,m64[RBP] ; paddusb XMM1,XMM2 ; paddusb XMM3,m128[RBP] ; paddusw MM1,MM2 ; paddusw MM3,m64[RBP] ; paddusw XMM1,XMM2 ; paddusw XMM3,m128[RBP] ; pand MM1,MM2 ; pand MM3,m64[RBP] ; pand XMM1,XMM2 ; pand XMM3,m128[RBP] ; pandn MM1,MM2 ; pandn MM3,m64[RBP] ; pandn XMM1,XMM2 ; pandn XMM3,m128[RBP] ; pavgb MM1,MM2 ; pavgb MM3,m64[RBP] ; pavgb XMM1,XMM2 ; pavgb XMM3,m128[RBP] ; pavgw MM1,MM2 ; pavgw MM3,m64[RBP] ; pavgw XMM1,XMM2 ; pavgw XMM3,m128[RBP] ; pcmpeqb MM1,MM2 ; pcmpeqb MM3,m64[RBP] ; pcmpeqb XMM1,XMM2 ; pcmpeqb XMM3,m128[RBP] ; pcmpeqw MM1,MM2 ; pcmpeqw MM3,m64[RBP] ; pcmpeqw XMM1,XMM2 ; pcmpeqw XMM3,m128[RBP] ; pcmpeqd MM1,MM2 ; pcmpeqd MM3,m64[RBP] ; pcmpeqd XMM1,XMM2 ; pcmpeqd XMM3,m128[RBP] ; pcmpgtb MM1,MM2 ; pcmpgtb MM3,m64[RBP] ; pcmpgtb XMM1,XMM2 ; pcmpgtb XMM3,m128[RBP] ; pcmpgtw MM1,MM2 ; pcmpgtw MM3,m64[RBP] ; pcmpgtw XMM1,XMM2 ; pcmpgtw XMM3,m128[RBP] ; pcmpgtd MM1,MM2 ; pcmpgtd MM3,m64[RBP] ; pcmpgtd XMM1,XMM2 ; pcmpgtd XMM3,m128[RBP] ; pextrw EDX,MM6,7 ; pextrw EDX,XMM6,7 ; pinsrw MM6,EDX,7 ; pinsrw MM6,m16[RBP],7 ; pinsrw XMM6,EDX,7 ; pinsrw XMM6,m16[RBP],7 ; pmaddwd MM1,MM2 ; pmaddwd MM3,m64[RBP] ; pmaddwd XMM1,XMM2 ; pmaddwd XMM3,m128[RBP] ; pmaxsw MM1,MM2 ; pmaxsw MM3,m64[RBP] ; pmaxsw XMM1,XMM2 ; pmaxsw XMM3,m128[RBP] ; pmaxub MM1,MM2 ; pmaxub MM3,m64[RBP] ; pmaxub XMM1,XMM2 ; pmaxub XMM3,m128[RBP] ; pminsw MM1,MM2 ; pminsw MM3,m64[RBP] ; pminsw XMM1,XMM2 ; pminsw XMM3,m128[RBP] ; pminub MM1,MM2 ; pminub MM3,m64[RBP] ; pminub XMM1,XMM2 ; pminub XMM3,m128[RBP] ; pmovmskb ECX,MM0 ; pmovmskb ECX,XMM6 ; pmulhuw MM1,MM2 ; pmulhuw MM3,m64[RBP] ; pmulhuw XMM1,XMM2 ; pmulhuw XMM3,m128[RBP] ; pmulhw MM1,MM2 ; pmulhw MM3,m64[RBP] ; pmulhw XMM1,XMM2 ; pmulhw XMM3,m128[RBP] ; pmullw MM1,MM2 ; pmullw MM3,m64[RBP] ; pmullw XMM1,XMM2 ; pmullw XMM3,m128[RBP] ; pmuludq MM1,MM2 ; pmuludq MM3,m64[RBP] ; pmuludq XMM1,XMM2 ; pmuludq XMM3,m128[RBP] ; por MM1,MM2 ; por MM3,m64[RBP] ; por XMM1,XMM2 ; por XMM3,m128[RBP] ; prefetcht0 m8[RBP] ; prefetcht1 m8[RBP] ; prefetcht2 m8[RBP] ; prefetchnta m8[RBP] ; prefetchw m8[EBP] ; prefetchwt1 m8[EBP] ; psadbw MM1,MM2 ; psadbw MM3,m64[RBP] ; psadbw XMM1,XMM2 ; psadbw XMM3,m128[RBP] ; pshufd XMM1,XMM2,3 ; pshufd XMM3,m128[RBP],3 ; pshufhw XMM1,XMM2,3 ; pshufhw XMM3,m128[RBP],3 ; pshuflw XMM1,XMM2,3 ; pshuflw XMM3,m128[RBP],3 ; pshufw MM1,MM2,3 ; pshufw MM3,m64[RBP],3 ; pslldq XMM1,0x18 ; psllw MM1,MM2 ; psllw MM1,m64[RBP] ; psllw XMM1,XMM2 ; psllw XMM1,m128[RBP] ; psllw MM1,0x15 ; psllw XMM1,0x15 ; pslld MM1,MM2 ; pslld MM1,m64[RBP] ; pslld XMM1,XMM2 ; pslld XMM1,m128[RBP] ; pslld MM1,0x15 ; pslld XMM1,0x15 ; psllq MM1,MM2 ; psllq MM1,m64[RBP] ; psllq XMM1,XMM2 ; psllq XMM1,m128[RBP] ; psllq MM1,0x15 ; psllq XMM1,0x15 ; psraw MM1,MM2 ; psraw MM1,m64[RBP] ; psraw XMM1,XMM2 ; psraw XMM1,m128[RBP] ; psraw MM1,0x15 ; psraw XMM1,0x15 ; psrad MM1,MM2 ; psrad MM1,m64[RBP] ; psrad XMM1,XMM2 ; psrad XMM1,m128[RBP] ; psrad MM1,0x15 ; psrad XMM1,0x15 ; psrldq XMM1,0x18 ; psrlw MM1,MM2 ; psrlw MM1,m64[RBP] ; psrlw XMM1,XMM2 ; psrlw XMM1,m128[RBP] ; psrlw MM1,0x15 ; psrlw XMM1,0x15 ; psrld MM1,MM2 ; psrld MM1,m64[RBP] ; psrld XMM1,XMM2 ; psrld XMM1,m128[RBP] ; psrld MM1,0x15 ; psrld XMM1,0x15 ; psrlq MM1,MM2 ; psrlq MM1,m64[RBP] ; psrlq XMM1,XMM2 ; psrlq XMM1,m128[RBP] ; psrlq MM1,0x15 ; psrlq XMM1,0x15 ; psubb MM1,MM2 ; psubb MM1,m64[RBP] ; psubb XMM1,XMM2 ; psubb XMM1,m128[RBP] ; psubw MM1,MM2 ; psubw MM1,m64[RBP] ; psubw XMM1,XMM2 ; psubw XMM1,m128[RBP] ; psubd MM1,MM2 ; psubd MM1,m64[RBP] ; psubd XMM1,XMM2 ; psubd XMM1,m128[RBP] ; psubq MM1,MM2 ; psubq MM1,m64[RBP] ; psubq XMM1,XMM2 ; psubq XMM1,m128[RBP] ; psubsb MM1,MM2 ; psubsb MM1,m64[RBP] ; psubsb XMM1,XMM2 ; psubsb XMM1,m128[RBP] ; psubsw MM1,MM2 ; psubsw MM1,m64[RBP] ; psubsw XMM1,XMM2 ; psubsw XMM1,m128[RBP] ; psubusb MM1,MM2 ; psubusb MM1,m64[RBP] ; psubusb XMM1,XMM2 ; psubusb XMM1,m128[RBP] ; psubusw MM1,MM2 ; psubusw MM1,m64[RBP] ; psubusw XMM1,XMM2 ; psubusw XMM1,m128[RBP] ; punpckhbw MM1,MM2 ; punpckhbw MM1,m64[RBP] ; punpckhbw XMM1,XMM2 ; punpckhbw XMM1,m128[RBP] ; punpckhwd MM1,MM2 ; punpckhwd MM1,m64[RBP] ; punpckhwd XMM1,XMM2 ; punpckhwd XMM1,m128[RBP] ; punpckhdq MM1,MM2 ; punpckhdq MM1,m64[RBP] ; punpckhdq XMM1,XMM2 ; punpckhdq XMM1,m128[RBP] ; punpckhqdq XMM1,XMM2 ; punpckhqdq XMM1,m128[RBP] ; punpcklbw MM1,MM2 ; punpcklbw MM1,m64[RBP] ; punpcklbw XMM1,XMM2 ; punpcklbw XMM1,m128[RBP] ; punpcklwd MM1,MM2 ; punpcklwd MM1,m64[RBP] ; punpcklwd XMM1,XMM2 ; punpcklwd XMM1,m128[RBP] ; punpckldq MM1,MM2 ; punpckldq MM1,m64[RBP] ; punpckldq XMM1,XMM2 ; punpckldq XMM1,m128[RBP] ; punpcklqdq XMM1,XMM2 ; punpcklqdq XMM1,m128[RBP] ; pxor MM1,MM2 ; pxor MM1,m64[RBP] ; pxor XMM1,XMM2 ; pxor XMM1,m128[RBP] ; rcpps XMM1,XMM2 ; rcpps XMM1,m128[RBP] ; rcpss XMM1,XMM2 ; rcpss XMM1,m32[RBP] ; rsqrtps XMM1,XMM2 ; rsqrtps XMM1,m128[RBP] ; rsqrtss XMM1,XMM2 ; rsqrtss XMM1,m32[RBP] ; shufpd XMM1,XMM2,3 ; shufpd XMM1,m128[RBP],4 ; shufps XMM1,XMM2,3 ; shufps XMM1,m128[RBP],4 ; ucomisd XMM4,XMM6 ; ucomisd XMM5,m64[RBP] ; ucomiss XMM6,XMM7 ; ucomiss XMM7,m32[RBP] ; unpckhpd XMM4,XMM6 ; unpckhpd XMM5,m128[RBP] ; unpckhps XMM4,XMM6 ; unpckhps XMM5,m128[RBP] ; unpcklpd XMM4,XMM6 ; unpcklpd XMM5,m128[RBP] ; unpcklps XMM4,XMM6 ; unpcklps XMM5,m128[RBP] ; xorpd XMM1,XMM2 ; xorpd XMM1,m128[RBP] ; xorps XMM1,XMM2 ; xorps XMM1,m128[RBP] ; L1: ; pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], data[i]); assert(p[i] == data[i]); } } /****************************************************/ void test15() { int m32; long m64; M128 m128; ubyte *p; static ubyte[] data = [ 0x0F,0x0F,0xDC,0xBF, // pavgusb MM3,MM4 0x0F,0x0F,0x5D,0xD8,0xBF, // pavgusb MM3,-028h[RBP] 0x0F,0x0F,0xDC,0x1D, // pf2id MM3,MM4 0x0F,0x0F,0x5D,0xD8,0x1D, // pf2id MM3,-028h[RBP] 0x0F,0x0F,0xDC,0xAE, // pfacc MM3,MM4 0x0F,0x0F,0x5D,0xD8,0xAE, // pfacc MM3,-028h[RBP] 0x0F,0x0F,0xDC,0x9E, // pfadd MM3,MM4 0x0F,0x0F,0x5D,0xD8,0x9E, // pfadd MM3,-028h[RBP] 0x0F,0x0F,0xDC,0xB0, // pfcmpeq MM3,MM4 0x0F,0x0F,0x5D,0xD8,0xB0, // pfcmpeq MM3,-028h[RBP] 0x0F,0x0F,0xDC,0x90, // pfcmpge MM3,MM4 0x0F,0x0F,0x5D,0xD8,0x90, // pfcmpge MM3,-028h[RBP] 0x0F,0x0F,0xDC,0xA0, // pfcmpgt MM3,MM4 0x0F,0x0F,0x5D,0xD8,0xA0, // pfcmpgt MM3,-028h[RBP] 0x0F,0x0F,0xDC,0xA4, // pfmax MM3,MM4 0x0F,0x0F,0x5D,0xD8,0x94, // pfmin MM3,-028h[RBP] 0x0F,0x0F,0xDC,0xB4, // pfmul MM3,MM4 0x0F,0x0F,0x5D,0xD8,0xB4, // pfmul MM3,-028h[RBP] 0x0F,0x0F,0xDC,0x8A, // pfnacc MM3,MM4 0x0F,0x0F,0x5D,0xD8,0x8E, // pfpnacc MM3,-028h[RBP] 0x0F,0x0F,0xDC,0x96, // pfrcp MM3,MM4 0x0F,0x0F,0x5D,0xD8,0x96, // pfrcp MM3,-028h[RBP] 0x0F,0x0F,0xDC,0xA6, // pfrcpit1 MM3,MM4 0x0F,0x0F,0x5D,0xD8,0xA6, // pfrcpit1 MM3,-028h[RBP] 0x0F,0x0F,0xDC,0xB6, // pfrcpit2 MM3,MM4 0x0F,0x0F,0x5D,0xD8,0xB6, // pfrcpit2 MM3,-028h[RBP] 0x0F,0x0F,0xDC,0x97, // pfrsqrt MM3,MM4 0x0F,0x0F,0x5D,0xD8,0xA7, // pfrsqit1 MM3,-028h[RBP] 0x0F,0x0F,0xDC,0x9A, // pfsub MM3,MM4 0x0F,0x0F,0x5D,0xD8,0x9A, // pfsub MM3,-028h[RBP] 0x0F,0x0F,0xDC,0xAA, // pfsubr MM3,MM4 0x0F,0x0F,0x5D,0xD8,0xAA, // pfsubr MM3,-028h[RBP] 0x0F,0x0F,0xDC,0x0D, // pi2fd MM3,MM4 0x0F,0x0F,0x5D,0xD8,0x0D, // pi2fd MM3,-028h[RBP] 0x0F,0x0F,0xDC,0xB7, // pmulhrw MM3,MM4 0x0F,0x0F,0x5D,0xD8,0xB7, // pmulhrw MM3,-028h[RBP] 0x0F,0x0F,0xDC,0xBB, // pswapd MM3,MM4 0x0F,0x0F,0x5D,0xD8,0xBB, // pswapd MM3,-028h[RBP] ]; int i; asm { call L1 ; pavgusb MM3,MM4 ; pavgusb MM3,m64[RBP] ; pf2id MM3,MM4 ; pf2id MM3,m64[RBP] ; pfacc MM3,MM4 ; pfacc MM3,m64[RBP] ; pfadd MM3,MM4 ; pfadd MM3,m64[RBP] ; pfcmpeq MM3,MM4 ; pfcmpeq MM3,m64[RBP] ; pfcmpge MM3,MM4 ; pfcmpge MM3,m64[RBP] ; pfcmpgt MM3,MM4 ; pfcmpgt MM3,m64[RBP] ; pfmax MM3,MM4 ; pfmin MM3,m64[RBP] ; pfmul MM3,MM4 ; pfmul MM3,m64[RBP] ; pfnacc MM3,MM4 ; pfpnacc MM3,m64[RBP] ; pfrcp MM3,MM4 ; pfrcp MM3,m64[RBP] ; pfrcpit1 MM3,MM4 ; pfrcpit1 MM3,m64[RBP] ; pfrcpit2 MM3,MM4 ; pfrcpit2 MM3,m64[RBP] ; pfrsqrt MM3,MM4 ; pfrsqit1 MM3,m64[RBP] ; pfsub MM3,MM4 ; pfsub MM3,m64[RBP] ; pfsubr MM3,MM4 ; pfsubr MM3,m64[RBP] ; pi2fd MM3,MM4 ; pi2fd MM3,m64[RBP] ; pmulhrw MM3,MM4 ; pmulhrw MM3,m64[RBP] ; pswapd MM3,MM4 ; pswapd MM3,m64[RBP] ; L1: ; pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { assert(p[i] == data[i]); } } /****************************************************/ struct S17 { char[6] x; } __gshared S17 xx17; void test17() { ubyte *p; static ubyte[] data = [ 0x0F, 0x01, 0x10, // lgdt [EAX] 0x0F, 0x01, 0x18, // lidt [EAX] 0x0F, 0x01, 0x00, // sgdt [EAX] 0x0F, 0x01, 0x08, // sidt [EAX] ]; int i; asm { call L1 ; lgdt [RAX] ; lidt [RAX] ; sgdt [RAX] ; sidt [RAX] ; lgdt xx17 ; lidt xx17 ; sgdt xx17 ; sidt xx17 ; L1: pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { assert(p[i] == data[i]); } } /****************************************************/ void test18() { ubyte *p; static ubyte[] data = [ 0xDB, 0xF1, // fcomi ST,ST(1) 0xDB, 0xF0, // fcomi ST,ST(0) 0xDB, 0xF2, // fcomi ST,ST(2) 0xDF, 0xF1, // fcomip ST,ST(1) 0xDF, 0xF0, // fcomip ST,ST(0) 0xDF, 0xF2, // fcomip ST,ST(2) 0xDB, 0xE9, // fucomi ST,ST(1) 0xDB, 0xE8, // fucomi ST,ST(0) 0xDB, 0xEB, // fucomi ST,ST(3) 0xDF, 0xE9, // fucomip ST,ST(1) 0xDF, 0xED, // fucomip ST,ST(5) 0xDF, 0xEC, // fucomip ST,ST(4) ]; int i; asm { call L1 ; fcomi ; fcomi ST(0) ; fcomi ST,ST(2) ; fcomip ; fcomip ST(0) ; fcomip ST,ST(2) ; fucomi ; fucomi ST(0) ; fucomi ST,ST(3) ; fucomip ; fucomip ST(5) ; fucomip ST,ST(4) ; L1: pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { assert(p[i] == data[i]); } } /****************************************************/ extern (C) { void foo19() { } } void test19() { void function() fp; ulong x; ulong *p; asm { lea RAX, qword ptr [foo19]; mov fp, RAX; mov x, RAX; mov p, RAX; call fp; } (*fp)(); } /****************************************************/ /+ void test20() { ubyte *p; static ubyte[] data = [ 0x9B, 0xDB, 0xE0, // feni 0xDB, 0xE0, // fneni 0x9B, 0xDB, 0xE1, // fdisi 0xDB, 0xE1, // fndisi 0x9B, 0xDB, 0xE2, // fclex 0xDB, 0xE2, // fnclex 0x9B, 0xDB, 0xE3, // finit 0xDB, 0xE3, // fninit 0xDB, 0xE4, // fsetpm ]; int i; asm { call L1 ; feni ; fneni ; fdisi ; fndisi ; finit ; fninit ; fclex ; fnclex ; finit ; fninit ; fsetpm ; L1: pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { assert(p[i] == data[i]); } } +/ /****************************************************/ void test21() { ubyte *p; static ubyte[] data = [ 0xE4, 0x06, // in AL,6 0x66, 0xE5, 0x07, // in AX,7 0xE5, 0x08, // in EAX,8 0xEC, // in AL,DX 0x66, 0xED, // in AX,DX 0xED, // in EAX,DX 0xE6, 0x06, // out 6,AL 0x66, 0xE7, 0x07, // out 7,AX 0xE7, 0x08, // out 8,EAX 0xEE, // out DX,AL 0x66, 0xEF, // out DX,AX 0xEF, // out DX,EAX ]; int i; asm { call L1 ; in AL,6 ; in AX,7 ; in EAX,8 ; in AL,DX ; in AX,DX ; in EAX,DX ; out 6,AL ; out 7,AX ; out 8,EAX ; out DX,AL ; out DX,AX ; out DX,EAX ; L1: pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { assert(p[i] == data[i]); } } /****************************************************/ void test22() { ubyte *p; static ubyte[] data = [ 0x0F, 0xC7, 0x4D, 0xE0, // cmpxchg8b 0x48, 0x0F, 0xC7, 0x4D, 0xF0, // cmpxchg16b 0x40, 0x0F, 0xB0, 0x3A // cmpxchg [RDX],DIL ]; int i; M64 m64; M128 m128; asm { call L1 ; cmpxchg8b m64 ; cmpxchg16b m128 ; cmpxchg [RDX],DIL ; L1: pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { assert(p[i] == data[i]); } } /****************************************************/ void test23() { short m16; int m32; long m64; M128 m128; ubyte *p; static ubyte[] data = [ 0xD9, 0xC9, // fxch ST(1), ST(0) 0xDF, 0x5D, 0xD0, // fistp word ptr -030h[RBP] 0xDB, 0x5D, 0xD4, // fistp dword ptr -02Ch[RBP] 0xDF, 0x7D, 0xD8, // fistp long64 ptr -028h[RBP] 0xDF, 0x4D, 0xD0, // fisttp short ptr -030h[RBP] 0xDB, 0x4D, 0xD4, // fisttp word ptr -02Ch[RBP] 0xDD, 0x4D, 0xD8, // fisttp long64 ptr -028h[RBP] 0x0F, 0x01, 0xC8, // monitor 0x0F, 0x01, 0xC9, // mwait 0x0F, 0x01, 0xD0, // xgetbv 0x66, 0x0F, 0xD0, 0xCA, // addsubpd XMM1,XMM2 0x66, 0x0F, 0xD0, 0x4D, 0xE0, // addsubpd XMM1,-020h[RBP] 0xF2, 0x0F, 0xD0, 0xCA, // addsubps XMM1,XMM2 0xF2, 0x0F, 0xD0, 0x4D, 0xE0, // addsubps XMM1,-020h[RBP] 0x66, 0x0F, 0x7C, 0xCA, // haddpd XMM1,XMM2 0x66, 0x0F, 0x7C, 0x4D, 0xE0, // haddpd XMM1,-020h[RBP] 0xF2, 0x0F, 0x7C, 0xCA, // haddps XMM1,XMM2 0xF2, 0x0F, 0x7C, 0x4D, 0xE0, // haddps XMM1,-020h[RBP] 0x66, 0x0F, 0x7D, 0xCA, // hsubpd XMM1,XMM2 0x66, 0x0F, 0x7D, 0x4D, 0xE0, // hsubpd XMM1,-020h[RBP] 0xF2, 0x0F, 0x7D, 0xCA, // hsubps XMM1,XMM2 0xF2, 0x0F, 0x7D, 0x4D, 0xE0, // hsubps XMM1,-020h[RBP] 0xF2, 0x0F, 0xF0, 0x4D, 0xE0, // lddqu XMM1,-020h[RBP] 0xF2, 0x0F, 0x12, 0xCA, // movddup XMM1,XMM2 0xF2, 0x0F, 0x12, 0x4D, 0xD8, // movddup XMM1,-028h[RBP] 0xF3, 0x0F, 0x16, 0xCA, // movshdup XMM1,XMM2 0xF3, 0x0F, 0x16, 0x4D, 0xE0, // movshdup XMM1,-020h[RBP] 0xF3, 0x0F, 0x12, 0xCA, // movsldup XMM1,XMM2 0xF3, 0x0F, 0x12, 0x4D, 0xE0, // movsldup XMM1,-020h[RBP] ]; int i; asm { call L1 ; fxch ST(1), ST(0) ; fistp m16[RBP] ; fistp m32[RBP] ; fistp m64[RBP] ; fisttp m16[RBP] ; fisttp m32[RBP] ; fisttp m64[RBP] ; monitor ; mwait ; xgetbv ; addsubpd XMM1,XMM2 ; addsubpd XMM1,m128[RBP] ; addsubps XMM1,XMM2 ; addsubps XMM1,m128[RBP] ; haddpd XMM1,XMM2 ; haddpd XMM1,m128[RBP] ; haddps XMM1,XMM2 ; haddps XMM1,m128[RBP] ; hsubpd XMM1,XMM2 ; hsubpd XMM1,m128[RBP] ; hsubps XMM1,XMM2 ; hsubps XMM1,m128[RBP] ; lddqu XMM1,m128[RBP] ; movddup XMM1,XMM2 ; movddup XMM1,m64[RBP] ; movshdup XMM1,XMM2 ; movshdup XMM1,m128[RBP] ; movsldup XMM1,XMM2 ; movsldup XMM1,m128[RBP] ; L1: ; pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { assert(p[i] == data[i]); } } /****************************************************/ void test24() { ushort i; asm { lea AX, i; mov i, AX; } assert(cast(ushort)&i == i); } /****************************************************/ void test25() { short m16; int m32; long m64; M128 m128; ubyte *p; static ubyte[] data = [ 0x66, 0x0F, 0x7E, 0xC1, // movd ECX,XMM0 0x66, 0x0F, 0x7E, 0xC9, // movd ECX,XMM1 0x66, 0x0F, 0x7E, 0xD1, // movd ECX,XMM2 0x66, 0x0F, 0x7E, 0xD9, // movd ECX,XMM3 0x66, 0x0F, 0x7E, 0xE1, // movd ECX,XMM4 0x66, 0x0F, 0x7E, 0xE9, // movd ECX,XMM5 0x66, 0x0F, 0x7E, 0xF1, // movd ECX,XMM6 0x66, 0x0F, 0x7E, 0xF9, // movd ECX,XMM7 0x0F, 0x7E, 0xC1, // movd ECX,MM0 0x0F, 0x7E, 0xC9, // movd ECX,MM1 0x0F, 0x7E, 0xD1, // movd ECX,MM2 0x0F, 0x7E, 0xD9, // movd ECX,MM3 0x0F, 0x7E, 0xE1, // movd ECX,MM4 0x0F, 0x7E, 0xE9, // movd ECX,MM5 0x0F, 0x7E, 0xF1, // movd ECX,MM6 0x0F, 0x7E, 0xF9, // movd ECX,MM7 0x66, 0x0F, 0x6E, 0xC1, // movd XMM0,ECX 0x66, 0x0F, 0x6E, 0xC9, // movd XMM1,ECX 0x66, 0x0F, 0x6E, 0xD1, // movd XMM2,ECX 0x66, 0x0F, 0x6E, 0xD9, // movd XMM3,ECX 0x66, 0x0F, 0x6E, 0xE1, // movd XMM4,ECX 0x66, 0x0F, 0x6E, 0xE9, // movd XMM5,ECX 0x66, 0x0F, 0x6E, 0xF1, // movd XMM6,ECX 0x66, 0x0F, 0x6E, 0xF9, // movd XMM7,ECX 0x0F, 0x6E, 0xC1, // movd MM0,ECX 0x0F, 0x6E, 0xC9, // movd MM1,ECX 0x0F, 0x6E, 0xD1, // movd MM2,ECX 0x0F, 0x6E, 0xD9, // movd MM3,ECX 0x0F, 0x6E, 0xE1, // movd MM4,ECX 0x0F, 0x6E, 0xE9, // movd MM5,ECX 0x0F, 0x6E, 0xF1, // movd MM6,ECX 0x0F, 0x6E, 0xF9, // movd MM7,ECX 0x66, 0x0F, 0x7E, 0xC8, // movd EAX,XMM1 0x66, 0x0F, 0x7E, 0xCB, // movd EBX,XMM1 0x66, 0x0F, 0x7E, 0xC9, // movd ECX,XMM1 0x66, 0x0F, 0x7E, 0xCA, // movd EDX,XMM1 0x66, 0x0F, 0x7E, 0xCE, // movd ESI,XMM1 0x66, 0x0F, 0x7E, 0xCF, // movd EDI,XMM1 0x66, 0x0F, 0x7E, 0xCD, // movd EBP,XMM1 0x66, 0x0F, 0x7E, 0xCC, // movd ESP,XMM1 0x0F, 0x7E, 0xC8, // movd EAX,MM1 0x0F, 0x7E, 0xCB, // movd EBX,MM1 0x0F, 0x7E, 0xC9, // movd ECX,MM1 0x0F, 0x7E, 0xCA, // movd EDX,MM1 0x0F, 0x7E, 0xCE, // movd ESI,MM1 0x0F, 0x7E, 0xCF, // movd EDI,MM1 0x0F, 0x7E, 0xCD, // movd EBP,MM1 0x0F, 0x7E, 0xCC, // movd ESP,MM1 0x66, 0x0F, 0x6E, 0xC8, // movd XMM1,EAX 0x66, 0x0F, 0x6E, 0xCB, // movd XMM1,EBX 0x66, 0x0F, 0x6E, 0xC9, // movd XMM1,ECX 0x66, 0x0F, 0x6E, 0xCA, // movd XMM1,EDX 0x66, 0x0F, 0x6E, 0xCE, // movd XMM1,ESI 0x66, 0x0F, 0x6E, 0xCF, // movd XMM1,EDI 0x66, 0x0F, 0x6E, 0xCD, // movd XMM1,EBP 0x66, 0x0F, 0x6E, 0xCC, // movd XMM1,ESP 0x0F, 0x6E, 0xC8, // movd MM1,EAX 0x0F, 0x6E, 0xCB, // movd MM1,EBX 0x0F, 0x6E, 0xC9, // movd MM1,ECX 0x0F, 0x6E, 0xCA, // movd MM1,EDX 0x0F, 0x6E, 0xCE, // movd MM1,ESI 0x0F, 0x6E, 0xCF, // movd MM1,EDI 0x0F, 0x6E, 0xCD, // movd MM1,EBP 0x0F, 0x6E, 0xCC, // movd MM1,ESP ]; int i; asm { call L1 ; movd ECX, XMM0; movd ECX, XMM1; movd ECX, XMM2; movd ECX, XMM3; movd ECX, XMM4; movd ECX, XMM5; movd ECX, XMM6; movd ECX, XMM7; movd ECX, MM0; movd ECX, MM1; movd ECX, MM2; movd ECX, MM3; movd ECX, MM4; movd ECX, MM5; movd ECX, MM6; movd ECX, MM7; movd XMM0, ECX; movd XMM1, ECX; movd XMM2, ECX; movd XMM3, ECX; movd XMM4, ECX; movd XMM5, ECX; movd XMM6, ECX; movd XMM7, ECX; movd MM0, ECX; movd MM1, ECX; movd MM2, ECX; movd MM3, ECX; movd MM4, ECX; movd MM5, ECX; movd MM6, ECX; movd MM7, ECX; movd EAX, XMM1; movd EBX, XMM1; movd ECX, XMM1; movd EDX, XMM1; movd ESI, XMM1; movd EDI, XMM1; movd EBP, XMM1; movd ESP, XMM1; movd EAX, MM1; movd EBX, MM1; movd ECX, MM1; movd EDX, MM1; movd ESI, MM1; movd EDI, MM1; movd EBP, MM1; movd ESP, MM1; movd XMM1, EAX; movd XMM1, EBX; movd XMM1, ECX; movd XMM1, EDX; movd XMM1, ESI; movd XMM1, EDI; movd XMM1, EBP; movd XMM1, ESP; movd MM1, EAX; movd MM1, EBX; movd MM1, ECX; movd MM1, EDX; movd MM1, ESI; movd MM1, EDI; movd MM1, EBP; movd MM1, ESP; L1: ; pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { assert(p[i] == data[i]); } } /****************************************************/ void fn26(ref byte val) { asm { mov RAX, val; inc byte ptr [RAX]; } } void test26() { byte b; //printf( "%i\n", b ); assert(b == 0); fn26(b); //printf( "%i\n", b ); assert(b == 1); } /****************************************************/ void test27() { static const ubyte[16] a = [0, 1, 2, 3, 4, 5, 6, 7, 8 ,9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF]; version (Windows) { asm { movdqu XMM0, a; pslldq XMM0, 2; } } } /****************************************************/ /* PASS: cfloat z; cfloat[1] z; double z; double[1] b; long z; long[1] z; FAIL: (bad type/size of operands 'movq') byte[8] z; char[8] z; dchar[2] z; float[2] z; int[2] z; short[4] z; wchar[4] z; XPASS: (too small, but accecpted by DMD) cfloat[0] z; double[0] z; long[0] z; */ void test28() { // version (Windows) // { c_complex_float[4] z = void; static const ubyte[8] A = [3, 4, 9, 0, 1, 3, 7, 2]; ubyte[8] b; asm{ movq MM0, z; movq MM0, A; movq b, MM0; } for(size_t i = 0; i < A.length; i++) { if(A[i] != b[i]) { assert(0); } } // } } /****************************************************/ /+ shared int[5] bar29 = [3, 4, 5, 6, 7]; void test29() { int* x; asm { push offsetof bar29; pop EAX; mov x, EAX; } assert(*x == 3); asm { mov EAX, offsetof bar29; mov x, EAX; } assert(*x == 3); } +/ /****************************************************/ const int CONST_OFFSET30 = 10; void foo30() { asm { mov EDX, 10; mov EAX, [RDX + CONST_OFFSET30]; } } void test30() { } /****************************************************/ void test31() { ubyte *p; static ubyte[] data = [ 0xF7, 0xD8, // neg EAX 0x74, 0x04, // je L8 0xF7, 0xD8, // neg EAX 0x75, 0xFC, // jne L4 0xFF, 0xC0, // inc EAX ]; int i; asm { call L1 ; neg EAX; je L2; L3: neg EAX; jne L3; L2: inc EAX; L1: ; pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { assert(p[i] == data[i]); } } /****************************************************/ void infiniteAsmLoops() { /* This crashes DMD 0.162: */ for (;;) asm { inc EAX; } /* It doesn't seem to matter what you use. These all crash: */ //for (;;) asm { mov EAX, EBX; } //for (;;) asm { xor EAX, EAX; } //for (;;) asm { push 0; pop RAX; } //for (;;) asm { jmp infiniteAsmLoops; } /* This is a workaround: */ for (bool a = true; a;) asm { hlt; } // compiles /* But this isn't: */ //for (const bool a = true; a;) asm{ hlt; } // crashes DMD /* It's not restricted to for-statements: */ //while(1) asm { hlt; } // crashes DMD /* This compiles: */ { bool a = true; while(a) asm { hlt; } } /* But again, this doesn't: */ /* { const bool a = true; // note the const while(a) asm { hlt; } } //*/ //do { asm { hlt; } } while (1); // crashes DMD /* This, of course, compiles: */ { bool a = true; do asm { hlt; } while (a); } /* But predicably, this doesn't: */ /* { const bool a = true; do asm { hlt; } while (a); } //**/ /* Not even hand-coding the loop works: */ /* { label: asm { hlt; } // commenting out this line to make it compile goto label; } //*/ /* Unless you go all the way: (i.e. this compiles) */ asm { L1: hlt; jmp L1; } /* or like this (also compiles): */ static void test() { asm { naked; hlt; jmp test; } } test(); /* Wait... it gets weirder: */ /* This also doesn't compile: */ /* for (;;) { printf("\n"); asm { hlt; } } //*/ /* But this does: */ //* for (;;) { asm { hlt; } printf("\n"); } //*/ /* The same loop that doesn't compile above * /does/ compile after previous one: */ //* for (;;) { printf("\n"); asm { hlt; } } //*/ /* Note: this one is at the end because it seems to also trigger the * "now it works" event of the loop above. */ /* There has to be /something/ in that asm block: */ for (;;) asm {} // compiles } void test32() { } /****************************************************/ void test33() { int x = 1; alias x y; asm { mov EAX, x; mov EAX, y; } } /****************************************************/ int test34() { asm{ jmp label; } return 0; label: return 1; } /****************************************************/ /+ void foo35() { printf("hello\n"); } void test35() { void function() p; ulong q; asm { mov ECX, foo35 ; mov q, ECX ; lea EDX, foo35 ; mov p, EDX ; } assert(p == &foo35); assert(q == *cast(ulong *)p); } /****************************************************/ void func36() { } int test36() { void* a = &func36; ulong* b = cast(ulong*) a; ulong f = *b; ulong g; asm{ mov RAX, func36; mov g, RAX; } if(f != g){ assert(0); } } +/ /****************************************************/ void a37(X...)(X expr) { alias expr[0] var1; asm { fld double ptr expr[0]; fstp double ptr var1; } } void test37() { a37(3.6); } /****************************************************/ int f38(X...)(X x) { asm { mov EAX, int ptr x[1]; } } int g38(X...)(X x) { asm { mov EAX, x[1]; } } void test38() { assert(456 == f38(123, 456)); assert(456 == g38(123, 456)); } /****************************************************/ void test39() { const byte z = 35; goto end; asm { db z; } end: ; } /****************************************************/ void test40() { printf(""); const string s = "abcdefghi"; asm { jmp L1; ds s; L1:; } end: ; } /****************************************************/ void test41() { ubyte *p; static ubyte[] data = [ 0x66,0x0F,0x28,0x0C,0x06, // movapd XMM1,[RAX][RSI] 0x66,0x0F,0x28,0x0C,0x06, // movapd XMM1,[RAX][RSI] 0x66,0x0F,0x28,0x0C,0x46, // movapd XMM1,[RAX*2][RSI] 0x66,0x0F,0x28,0x0C,0x86, // movapd XMM1,[RAX*4][RSI] 0x66,0x0F,0x28,0x0C,0xC6, // movapd XMM1,[RAX*8][RSI] ]; int i; asm { call L1 ; movapd XMM1, [RSI+RAX]; movapd XMM1, [RSI+1*RAX]; movapd XMM1, [RSI+2*RAX]; movapd XMM1, [RSI+4*RAX]; movapd XMM1, [RSI+8*RAX]; L1: ; pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { assert(p[i] == data[i]); } } /****************************************************/ enum { enumeration42 = 1, } void test42() { asm { mov EAX, enumeration42; } } /****************************************************/ void foo43() { asm {lea EAX, [0*4+EAX]; } asm {lea EAX, [4*0+EAX]; } asm {lea EAX, [EAX+4*0]; } asm {lea EAX, [0+EAX]; } asm {lea EAX, [7*7+EAX]; } } void test43() { } /****************************************************/ enum n1 = 42; enum { n2 = 42 } uint retN1() { asm { mov EAX,n1; // No! - mov EAX,-4[EBP] } } uint retN2() { asm { mov EAX,n2; // OK - mov EAX,02Ah } } void test44() { assert(retN1() == 42); assert(retN2() == 42); } /****************************************************/ void test45() { ubyte *p; static ubyte[] data = [ 0xDA, 0xC0, // fcmovb ST(0) 0xDA, 0xC1, // fcmovb 0xDA, 0xCA, // fcmove ST(2) 0xDA, 0xD3, // fcmovbe ST(3) 0xDA, 0xDC, // fcmovu ST(4) 0xDB, 0xC5, // fcmovnb ST(5) 0xDB, 0xCE, // fcmovne ST(6) 0xDB, 0xD7, // fcmovnbe ST(7) 0xDB, 0xD9, // fcmovnu ]; int i; asm { call L1 ; fcmovb ST, ST(0); fcmovb ST, ST(1); fcmove ST, ST(2); fcmovbe ST, ST(3); fcmovu ST, ST(4); fcmovnb ST, ST(5); fcmovne ST, ST(6); fcmovnbe ST, ST(7); fcmovnu ST, ST(1); L1: ; pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { assert(p[i] == data[i]); } } /****************************************************/ void test46() { ubyte *p; static ubyte[] data = [ 0x66, 0x0F, 0x3A, 0x41, 0xCA, 0x08, // dppd XMM1,XMM2,8 0x66, 0x0F, 0x3A, 0x40, 0xDC, 0x07, // dpps XMM3,XMM4,7 0x66, 0x0F, 0x50, 0xF3, // movmskpd ESI,XMM3 0x66, 0x0F, 0x50, 0xC7, // movmskpd EAX,XMM7 0x0F, 0x50, 0xC7, // movmskps EAX,XMM7 0x0F, 0xD7, 0xC7, // pmovmskb EAX,MM7 0x66, 0x0F, 0xD7, 0xC7, // pmovmskb EAX,XMM7 ]; int i; asm { call L1 ; dppd XMM1,XMM2,8 ; dpps XMM3,XMM4,7 ; movmskpd ESI,XMM3 ; movmskpd EAX,XMM7 ; movmskps EAX,XMM7 ; pmovmskb EAX,MM7 ; pmovmskb EAX,XMM7 ; L1: ; pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { assert(p[i] == data[i]); } } /****************************************************/ /+ struct Foo47 { float x,y; } void bar47(Foo47 f) { int i; asm { mov EAX, offsetof f; mov i, EAX; } printf("%d\n",i); assert(i == 8); } void test47() { Foo47 f; bar47(f); } +/ /****************************************************/ void func48(void delegate () callback) { callback(); } void test48() { func48(() { asm{ mov EAX,EAX; } }); } /****************************************************/ void test49() { ubyte *p; static ubyte[] data = [ 0x00, 0xC0, // add AL,AL 0x00, 0xD8, // add AL,BL 0x00, 0xC8, // add AL,CL 0x00, 0xD0, // add AL,DL 0x00, 0xE0, // add AL,AH 0x00, 0xF8, // add AL,BH 0x00, 0xE8, // add AL,CH 0x00, 0xF0, // add AL,DH 0x00, 0xC4, // add AH,AL 0x00, 0xDC, // add AH,BL 0x00, 0xCC, // add AH,CL 0x00, 0xD4, // add AH,DL 0x00, 0xE4, // add AH,AH 0x00, 0xFC, // add AH,BH 0x00, 0xEC, // add AH,CH 0x00, 0xF4, // add AH,DH 0x00, 0xC3, // add BL,AL 0x00, 0xDB, // add BL,BL 0x00, 0xCB, // add BL,CL 0x00, 0xD3, // add BL,DL 0x00, 0xE3, // add BL,AH 0x00, 0xFB, // add BL,BH 0x00, 0xEB, // add BL,CH 0x00, 0xF3, // add BL,DH 0x00, 0xC7, // add BH,AL 0x00, 0xDF, // add BH,BL 0x00, 0xCF, // add BH,CL 0x00, 0xD7, // add BH,DL 0x00, 0xE7, // add BH,AH 0x00, 0xFF, // add BH,BH 0x00, 0xEF, // add BH,CH 0x00, 0xF7, // add BH,DH 0x00, 0xC1, // add CL,AL 0x00, 0xD9, // add CL,BL 0x00, 0xC9, // add CL,CL 0x00, 0xD1, // add CL,DL 0x00, 0xE1, // add CL,AH 0x00, 0xF9, // add CL,BH 0x00, 0xE9, // add CL,CH 0x00, 0xF1, // add CL,DH 0x00, 0xC5, // add CH,AL 0x00, 0xDD, // add CH,BL 0x00, 0xCD, // add CH,CL 0x00, 0xD5, // add CH,DL 0x00, 0xE5, // add CH,AH 0x00, 0xFD, // add CH,BH 0x00, 0xED, // add CH,CH 0x00, 0xF5, // add CH,DH 0x00, 0xC2, // add DL,AL 0x00, 0xDA, // add DL,BL 0x00, 0xCA, // add DL,CL 0x00, 0xD2, // add DL,DL 0x00, 0xE2, // add DL,AH 0x00, 0xFA, // add DL,BH 0x00, 0xEA, // add DL,CH 0x00, 0xF2, // add DL,DH 0x00, 0xC6, // add DH,AL 0x00, 0xDE, // add DH,BL 0x00, 0xCE, // add DH,CL 0x00, 0xD6, // add DH,DL 0x00, 0xE6, // add DH,AH 0x00, 0xFE, // add DH,BH 0x00, 0xEE, // add DH,CH 0x00, 0xF6, // add DH,DH 0x66, 0x01, 0xC0, // add AX,AX 0x66, 0x01, 0xD8, // add AX,BX 0x66, 0x01, 0xC8, // add AX,CX 0x66, 0x01, 0xD0, // add AX,DX 0x66, 0x01, 0xF0, // add AX,SI 0x66, 0x01, 0xF8, // add AX,DI 0x66, 0x01, 0xE8, // add AX,BP 0x66, 0x01, 0xE0, // add AX,SP 0x66, 0x01, 0xC3, // add BX,AX 0x66, 0x01, 0xDB, // add BX,BX 0x66, 0x01, 0xCB, // add BX,CX 0x66, 0x01, 0xD3, // add BX,DX 0x66, 0x01, 0xF3, // add BX,SI 0x66, 0x01, 0xFB, // add BX,DI 0x66, 0x01, 0xEB, // add BX,BP 0x66, 0x01, 0xE3, // add BX,SP 0x66, 0x01, 0xC1, // add CX,AX 0x66, 0x01, 0xD9, // add CX,BX 0x66, 0x01, 0xC9, // add CX,CX 0x66, 0x01, 0xD1, // add CX,DX 0x66, 0x01, 0xF1, // add CX,SI 0x66, 0x01, 0xF9, // add CX,DI 0x66, 0x01, 0xE9, // add CX,BP 0x66, 0x01, 0xE1, // add CX,SP 0x66, 0x01, 0xC2, // add DX,AX 0x66, 0x01, 0xDA, // add DX,BX 0x66, 0x01, 0xCA, // add DX,CX 0x66, 0x01, 0xD2, // add DX,DX 0x66, 0x01, 0xF2, // add DX,SI 0x66, 0x01, 0xFA, // add DX,DI 0x66, 0x01, 0xEA, // add DX,BP 0x66, 0x01, 0xE2, // add DX,SP 0x66, 0x01, 0xC6, // add SI,AX 0x66, 0x01, 0xDE, // add SI,BX 0x66, 0x01, 0xCE, // add SI,CX 0x66, 0x01, 0xD6, // add SI,DX 0x66, 0x01, 0xF6, // add SI,SI 0x66, 0x01, 0xFE, // add SI,DI 0x66, 0x01, 0xEE, // add SI,BP 0x66, 0x01, 0xE6, // add SI,SP 0x66, 0x01, 0xC7, // add DI,AX 0x66, 0x01, 0xDF, // add DI,BX 0x66, 0x01, 0xCF, // add DI,CX 0x66, 0x01, 0xD7, // add DI,DX 0x66, 0x01, 0xF7, // add DI,SI 0x66, 0x01, 0xFF, // add DI,DI 0x66, 0x01, 0xEF, // add DI,BP 0x66, 0x01, 0xE7, // add DI,SP 0x66, 0x01, 0xC5, // add BP,AX 0x66, 0x01, 0xDD, // add BP,BX 0x66, 0x01, 0xCD, // add BP,CX 0x66, 0x01, 0xD5, // add BP,DX 0x66, 0x01, 0xF5, // add BP,SI 0x66, 0x01, 0xFD, // add BP,DI 0x66, 0x01, 0xED, // add BP,BP 0x66, 0x01, 0xE5, // add BP,SP 0x66, 0x01, 0xC4, // add SP,AX 0x66, 0x01, 0xDC, // add SP,BX 0x66, 0x01, 0xCC, // add SP,CX 0x66, 0x01, 0xD4, // add SP,DX 0x66, 0x01, 0xF4, // add SP,SI 0x66, 0x01, 0xFC, // add SP,DI 0x66, 0x01, 0xEC, // add SP,BP 0x66, 0x01, 0xE4, // add SP,SP 0x01, 0xC0, // add EAX,EAX 0x01, 0xD8, // add EAX,EBX 0x01, 0xC8, // add EAX,ECX 0x01, 0xD0, // add EAX,EDX 0x01, 0xF0, // add EAX,ESI 0x01, 0xF8, // add EAX,EDI 0x01, 0xE8, // add EAX,EBP 0x01, 0xE0, // add EAX,ESP 0x01, 0xC3, // add EBX,EAX 0x01, 0xDB, // add EBX,EBX 0x01, 0xCB, // add EBX,ECX 0x01, 0xD3, // add EBX,EDX 0x01, 0xF3, // add EBX,ESI 0x01, 0xFB, // add EBX,EDI 0x01, 0xEB, // add EBX,EBP 0x01, 0xE3, // add EBX,ESP 0x01, 0xC1, // add ECX,EAX 0x01, 0xD9, // add ECX,EBX 0x01, 0xC9, // add ECX,ECX 0x01, 0xD1, // add ECX,EDX 0x01, 0xF1, // add ECX,ESI 0x01, 0xF9, // add ECX,EDI 0x01, 0xE9, // add ECX,EBP 0x01, 0xE1, // add ECX,ESP 0x01, 0xC2, // add EDX,EAX 0x01, 0xDA, // add EDX,EBX 0x01, 0xCA, // add EDX,ECX 0x01, 0xD2, // add EDX,EDX 0x01, 0xF2, // add EDX,ESI 0x01, 0xFA, // add EDX,EDI 0x01, 0xEA, // add EDX,EBP 0x01, 0xE2, // add EDX,ESP 0x01, 0xC6, // add ESI,EAX 0x01, 0xDE, // add ESI,EBX 0x01, 0xCE, // add ESI,ECX 0x01, 0xD6, // add ESI,EDX 0x01, 0xF6, // add ESI,ESI 0x01, 0xFE, // add ESI,EDI 0x01, 0xEE, // add ESI,EBP 0x01, 0xE6, // add ESI,ESP 0x01, 0xC7, // add EDI,EAX 0x01, 0xDF, // add EDI,EBX 0x01, 0xCF, // add EDI,ECX 0x01, 0xD7, // add EDI,EDX 0x01, 0xF7, // add EDI,ESI 0x01, 0xFF, // add EDI,EDI 0x01, 0xEF, // add EDI,EBP 0x01, 0xE7, // add EDI,ESP 0x01, 0xC5, // add EBP,EAX 0x01, 0xDD, // add EBP,EBX 0x01, 0xCD, // add EBP,ECX 0x01, 0xD5, // add EBP,EDX 0x01, 0xF5, // add EBP,ESI 0x01, 0xFD, // add EBP,EDI 0x01, 0xED, // add EBP,EBP 0x01, 0xE5, // add EBP,ESP 0x01, 0xC4, // add ESP,EAX 0x01, 0xDC, // add ESP,EBX 0x01, 0xCC, // add ESP,ECX 0x01, 0xD4, // add ESP,EDX 0x01, 0xF4, // add ESP,ESI 0x01, 0xFC, // add ESP,EDI 0x01, 0xEC, // add ESP,EBP 0x01, 0xE4, // add ESP,ESP ]; int i; asm { call L1 ; add AL,AL ; add AL,BL ; add AL,CL ; add AL,DL ; add AL,AH ; add AL,BH ; add AL,CH ; add AL,DH ; add AH,AL ; add AH,BL ; add AH,CL ; add AH,DL ; add AH,AH ; add AH,BH ; add AH,CH ; add AH,DH ; add BL,AL ; add BL,BL ; add BL,CL ; add BL,DL ; add BL,AH ; add BL,BH ; add BL,CH ; add BL,DH ; add BH,AL ; add BH,BL ; add BH,CL ; add BH,DL ; add BH,AH ; add BH,BH ; add BH,CH ; add BH,DH ; add CL,AL ; add CL,BL ; add CL,CL ; add CL,DL ; add CL,AH ; add CL,BH ; add CL,CH ; add CL,DH ; add CH,AL ; add CH,BL ; add CH,CL ; add CH,DL ; add CH,AH ; add CH,BH ; add CH,CH ; add CH,DH ; add DL,AL ; add DL,BL ; add DL,CL ; add DL,DL ; add DL,AH ; add DL,BH ; add DL,CH ; add DL,DH ; add DH,AL ; add DH,BL ; add DH,CL ; add DH,DL ; add DH,AH ; add DH,BH ; add DH,CH ; add DH,DH ; add AX,AX ; add AX,BX ; add AX,CX ; add AX,DX ; add AX,SI ; add AX,DI ; add AX,BP ; add AX,SP ; add BX,AX ; add BX,BX ; add BX,CX ; add BX,DX ; add BX,SI ; add BX,DI ; add BX,BP ; add BX,SP ; add CX,AX ; add CX,BX ; add CX,CX ; add CX,DX ; add CX,SI ; add CX,DI ; add CX,BP ; add CX,SP ; add DX,AX ; add DX,BX ; add DX,CX ; add DX,DX ; add DX,SI ; add DX,DI ; add DX,BP ; add DX,SP ; add SI,AX ; add SI,BX ; add SI,CX ; add SI,DX ; add SI,SI ; add SI,DI ; add SI,BP ; add SI,SP ; add DI,AX ; add DI,BX ; add DI,CX ; add DI,DX ; add DI,SI ; add DI,DI ; add DI,BP ; add DI,SP ; add BP,AX ; add BP,BX ; add BP,CX ; add BP,DX ; add BP,SI ; add BP,DI ; add BP,BP ; add BP,SP ; add SP,AX ; add SP,BX ; add SP,CX ; add SP,DX ; add SP,SI ; add SP,DI ; add SP,BP ; add SP,SP ; add EAX,EAX ; add EAX,EBX ; add EAX,ECX ; add EAX,EDX ; add EAX,ESI ; add EAX,EDI ; add EAX,EBP ; add EAX,ESP ; add EBX,EAX ; add EBX,EBX ; add EBX,ECX ; add EBX,EDX ; add EBX,ESI ; add EBX,EDI ; add EBX,EBP ; add EBX,ESP ; add ECX,EAX ; add ECX,EBX ; add ECX,ECX ; add ECX,EDX ; add ECX,ESI ; add ECX,EDI ; add ECX,EBP ; add ECX,ESP ; add EDX,EAX ; add EDX,EBX ; add EDX,ECX ; add EDX,EDX ; add EDX,ESI ; add EDX,EDI ; add EDX,EBP ; add EDX,ESP ; add ESI,EAX ; add ESI,EBX ; add ESI,ECX ; add ESI,EDX ; add ESI,ESI ; add ESI,EDI ; add ESI,EBP ; add ESI,ESP ; add EDI,EAX ; add EDI,EBX ; add EDI,ECX ; add EDI,EDX ; add EDI,ESI ; add EDI,EDI ; add EDI,EBP ; add EDI,ESP ; add EBP,EAX ; add EBP,EBX ; add EBP,ECX ; add EBP,EDX ; add EBP,ESI ; add EBP,EDI ; add EBP,EBP ; add EBP,ESP ; add ESP,EAX ; add ESP,EBX ; add ESP,ECX ; add ESP,EDX ; add ESP,ESI ; add ESP,EDI ; add ESP,EBP ; add ESP,ESP ; L1: ; pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { assert(p[i] == data[i]); } } /****************************************************/ void test50() { ubyte *p; static ubyte[] data = [ 0x66, 0x98, // cbw 0xF8, // clc 0xFC, // cld 0xFA, // cli 0xF5, // cmc 0xA6, // cmpsb 0x66, 0xA7, // cmpsw 0xA7, // cmpsd 0x66, 0x99, // cwd // 0x27, // daa // 0x2F, // das 0xFF, 0xC8, // dec EAX 0xF6, 0xF1, // div CL 0x66, 0xF7, 0xF3, // div BX 0xF7, 0xF2, // div EDX 0xF4, // hlt 0xF6, 0xFB, // idiv BL 0x66, 0xF7, 0xFA, // idiv DX 0xF7, 0xFE, // idiv ESI 0xF6, 0xEB, // imul BL 0x66, 0xF7, 0xEA, // imul DX 0xF7, 0xEE, // imul ESI 0xEC, // in AL,DX 0x66, 0xED, // in AX,DX 0xFF, 0xC3, // inc EBX 0xCC, // int 3 0xCD, 0x67, // int 067h // 0xCE, // into 0x66, 0xCF, // iret 0x48, 0xCF, // iretq 0x90, 0x90, // nop;nop - put instructions above this or L10 changes // address, which changes all the jump // instructions and breaks the test case. 0x77, 0xFC, // ja L30 0x77, 0xFA, // ja L30 0x73, 0xF8, // jae L30 0x73, 0xF6, // jae L30 0x73, 0xF4, // jae L30 0x72, 0xF2, // jb L30 0x72, 0xF0, // jb L30 0x76, 0xEE, // jbe L30 0x76, 0xEC, // jbe L30 0x72, 0xEA, // jb L30 // 0x67, 0xE3, 0xE7, // jcxz L30 0x90, 0x90, 0x90, // nop;nop;nop 0x74, 0xE5, // je L30 0x74, 0xE3, // je L30 0x7F, 0xE1, // jg L30 0x7F, 0xDF, // jg L30 0x7D, 0xDD, // jge L30 0x7D, 0xDB, // jge L30 0x7C, 0xD9, // jl L30 0x7C, 0xD7, // jl L30 0x7E, 0xD5, // jle L30 0x7E, 0xD3, // jle L30 0xEB, 0xD1, // jmp short L30 0x75, 0xCF, // jne L30 0x75, 0xCD, // jne L30 0x71, 0xCB, // jno L30 0x79, 0xC9, // jns L30 0x7B, 0xC7, // jnp L30 0x7B, 0xC5, // jnp L30 0x70, 0xC3, // jo L30 0x7A, 0xC1, // jp L30 0x7A, 0xBF, // jp L30 0x78, 0xBD, // js L30 0x9F, // lahf // 0xC5, 0x30, // lds ESI,[EAX] 0x90, 0x90, // nop;nop 0x8B, 0xFB, // mov EDI,EBX // 0xC4, 0x29, // les EBP,[ECX] 0x90, 0x90, // nop;nop 0xF0, // lock 0xAC, // lodsb 0x66, 0xAD, // lodsw 0xAD, // lodsd 0xE2, 0xAF, // loop L30 0xE1, 0xAD, // loope L30 0xE1, 0xAB, // loope L30 0xE0, 0xA9, // loopne L30 0xE0, 0xA7, // loopne L30 0xA4, // movsb 0x66, 0xA5, // movsw 0xA5, // movsd 0xF6, 0xE4, // mul AH 0x66, 0xF7, 0xE1, // mul CX 0xF7, 0xE5, // mul EBP 0x90, // nop 0xF7, 0xD7, // not EDI 0x66, 0xE7, 0x44, // out 044h,AX 0xEE, // out DX,AL 0x66, 0x9D, // popf 0x66, 0x9C, // pushf 0xD1, 0xDB, // rcr EBX,1 0xF3, // rep 0xF3, // rep 0xF2, // repne 0xF3, // rep 0xF2, // repne 0xC3, // ret 0xC2, 0x04, 0x00, // ret 4 0xD1, 0xC1, // rol ECX,1 0xD1, 0xCA, // ror EDX,1 0x9E, // sahf 0xD1, 0xE5, // shl EBP,1 0xD1, 0xE4, // shl ESP,1 0xD1, 0xFF, // sar EDI,1 0xAE, // scasb 0x66, 0xAF, // scasw 0xAF, // scasd 0xD1, 0xEE, // shr ESI,1 0xFD, // std 0xF9, // stc 0xFB, // sti 0xAA, // stosb 0x66, 0xAB, // stosw 0xAB, // stosd 0x9B, // wait 0x91, // xchg EAX,ECX 0xD7, // xlat 0xF3, 0x0F, 0x1E, 0xFB, // endbr32 0xF3, 0x0F, 0x1E, 0xFA, // endbr64 0x48, 0x8D, 0x1D, 0x02, 0x00, 0x00, 0x00, // lea RBX,L1; 0x89, 0xC0, // mov EAX,EAX ]; int i; asm { call L1 ; cbw ; clc ; cld ; cli ; cmc ; cmpsb ; cmpsw ; cmpsd ; cwd ; //daa ; //das ; dec EAX ; div CL ; div BX ; div EDX ; hlt ; idiv BL ; idiv DX ; idiv ESI ; imul BL ; imul DX ; imul ESI ; in AL,DX ; in AX,DX ; inc EBX ; int 3 ; int 0x67 ; //into ; iret ; iretq ; L10: nop; nop; // put instructions above this or L10 changes // address, which changes all the jump instructions // and breaks the test case. ja L10 ; jnbe L10 ; jae L10 ; jnb L10 ; jnc L10 ; jb L10 ; jnae L10 ; jbe L10 ; jna L10 ; jc L10 ; nop;nop;nop; // jcxz L10; je L10 ; jz L10 ; jg L10 ; jnle L10 ; jge L10 ; jnl L10 ; jl L10 ; jnge L10 ; jle L10 ; jng L10 ; jmp short L10 ; jne L10 ; jnz L10 ; jno L10 ; jns L10 ; jnp L10 ; jpo L10 ; jo L10 ; jp L10 ; jpe L10 ; js L10 ; lahf ; nop;nop; //lds ESI,[EAX]; lea EDI,[EBX]; nop;nop; //les EBP,[ECX]; lock ; lodsb ; lodsw ; lodsd ; loop L10 ; loope L10 ; loopz L10 ; loopnz L10 ; loopne L10 ; movsb ; movsw ; movsd ; mul AH ; mul CX ; mul EBP ; nop ; not EDI ; out 0x44,AX ; out DX,AL ; popf ; pushf ; rcr EBX,1 ; rep ; repe ; repne ; repz ; repnz ; ret ; ret 4 ; rol ECX,1 ; ror EDX,1 ; sahf ; sal EBP,1 ; shl ESP,1 ; sar EDI,1 ; scasb ; scasw ; scasd ; shr ESI,1 ; std ; stc ; sti ; stosb ; stosw ; stosd ; wait ; xchg EAX,ECX ; xlat ; endbr32 ; endbr64 ; lea RBX,L1 ; mov EAX,EAX ; L1: ; pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { //printf("[%d] %02x %02x\n", i, p[i], data[i]); assert(p[i] == data[i]); } } /****************************************************/ class Test51 { void test(int n) { asm { mov RAX, this; } } } /****************************************************/ void test52() { int x; ubyte* p; static ubyte[] data = [ 0xF6, 0xD8, // neg AL 0x66, 0xF7, 0xD8, // neg AX 0xF7, 0xD8, // neg EAX 0x48, 0xF7, 0xD8, // neg RAX 0xF6, 0xDC, // neg AH 0x41, 0xF6, 0xDC, // neg R12B 0x66, 0x41, 0xF7, 0xDC, // neg 12D 0x41, 0xF7, 0xDC, // neg R12D 0x49, 0xF7, 0xDB, // neg R11 // 0xF6, 0x1D, 0x00, 0x00, 0x00, 0x00, // neg byte ptr _D6iasm641bg@PC32[RIP] //0x66, 0xF7, 0x1D, 0x00, 0x00, 0x00, 0x00, // neg word ptr _D6iasm641ws@PC32[RIP] // 0xF7, 0x1D, 0x00, 0x00, 0x00, 0x00, // neg dword ptr _D6iasm641ii@PC32[RIP] // 0x48, 0xF7, 0x1D, 0x00, 0x00, 0x00, 0x00, // neg qword ptr _D6iasm641ll@PC32[RIP] 0xF7, 0x5D, 0xD0, // neg dword ptr -8[RBP] 0xF6, 0x1B, // neg byte ptr [RBX] 0xF6, 0x1B, // neg byte ptr [RBX] 0x49, 0xF7, 0xD8, // neg R8 ]; asm { call L1 ; neg AL ; neg AX ; neg EAX ; neg RAX ; neg AH ; neg R12B ; neg R12W ; neg R12D ; neg R11 ; // neg b ; // neg w ; // neg i ; // neg l ; neg x ; neg byte ptr [EBX] ; neg byte ptr [RBX] ; neg R8 ; L1: pop RAX ; mov p[RBP],RAX ; } foreach (ref i, b; data) { //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b); assert(p[i] == b); } } /****************************************************/ void test53() { int x; ubyte* p; static ubyte[] data = [ 0x48, 0x8D, 0x04, 0x00, // lea RAX,[RAX][RAX] 0x48, 0x8D, 0x04, 0x08, // lea RAX,[RCX][RAX] 0x48, 0x8D, 0x04, 0x10, // lea RAX,[RDX][RAX] 0x48, 0x8D, 0x04, 0x18, // lea RAX,[RBX][RAX] 0x48, 0x8D, 0x04, 0x28, // lea RAX,[RBP][RAX] 0x48, 0x8D, 0x04, 0x30, // lea RAX,[RSI][RAX] 0x48, 0x8D, 0x04, 0x38, // lea RAX,[RDI][RAX] 0x4A, 0x8D, 0x04, 0x00, // lea RAX,[R8][RAX] 0x4A, 0x8D, 0x04, 0x08, // lea RAX,[R9][RAX] 0x4A, 0x8D, 0x04, 0x10, // lea RAX,[R10][RAX] 0x4A, 0x8D, 0x04, 0x18, // lea RAX,[R11][RAX] 0x4A, 0x8D, 0x04, 0x20, // lea RAX,[R12][RAX] 0x4A, 0x8D, 0x04, 0x28, // lea RAX,[R13][RAX] 0x4A, 0x8D, 0x04, 0x30, // lea RAX,[R14][RAX] 0x4A, 0x8D, 0x04, 0x38, // lea RAX,[R15][RAX] 0x48, 0x8D, 0x04, 0x00, // lea RAX,[RAX][RAX] 0x48, 0x8D, 0x04, 0x01, // lea RAX,[RAX][RCX] 0x48, 0x8D, 0x04, 0x02, // lea RAX,[RAX][RDX] 0x48, 0x8D, 0x04, 0x03, // lea RAX,[RAX][RBX] 0x48, 0x8D, 0x04, 0x04, // lea RAX,[RAX][RSP] 0x48, 0x8D, 0x44, 0x05, 0x00, // lea RAX,0[RAX][RBP] 0x48, 0x8D, 0x04, 0x06, // lea RAX,[RAX][RSI] 0x48, 0x8D, 0x04, 0x07, // lea RAX,[RAX][RDI] 0x49, 0x8D, 0x04, 0x00, // lea RAX,[RAX][R8] 0x49, 0x8D, 0x04, 0x01, // lea RAX,[RAX][R9] 0x49, 0x8D, 0x04, 0x02, // lea RAX,[RAX][R10] 0x49, 0x8D, 0x04, 0x03, // lea RAX,[RAX][R11] 0x49, 0x8D, 0x04, 0x04, // lea RAX,[RAX][R12] 0x49, 0x8D, 0x44, 0x05, 0x00, // lea RAX,0[RAX][R13] 0x49, 0x8D, 0x04, 0x06, // lea RAX,[RAX][R14] 0x49, 0x8D, 0x04, 0x07, // lea RAX,[RAX][R15] 0x4B, 0x8D, 0x04, 0x24, // lea RAX,[R12][R12] 0x4B, 0x8D, 0x44, 0x25, 0x00, // lea RAX,0[R12][R13] 0x4B, 0x8D, 0x04, 0x26, // lea RAX,[R12][R14] 0x4B, 0x8D, 0x04, 0x2C, // lea RAX,[R13][R12] 0x4B, 0x8D, 0x44, 0x2D, 0x00, // lea RAX,0[R13][R13] 0x4B, 0x8D, 0x04, 0x2E, // lea RAX,[R13][R14] 0x4B, 0x8D, 0x04, 0x34, // lea RAX,[R14][R12] 0x4B, 0x8D, 0x44, 0x35, 0x00, // lea RAX,0[R14][R13] 0x4B, 0x8D, 0x04, 0x36, // lea RAX,[R14][R14] 0x48, 0x8D, 0x44, 0x01, 0x12, // lea RAX,012h[RAX][RCX] 0x48, 0x8D, 0x84, 0x01, 0x34, 0x12, 0x00, 0x00, // lea RAX,01234h[RAX][RCX] 0x48, 0x8D, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, // lea RAX,012345678h[RAX][RCX] 0x48, 0x8D, 0x44, 0x05, 0x12, // lea RAX,012h[RAX][RBP] 0x48, 0x8D, 0x84, 0x05, 0x34, 0x12, 0x00, 0x00, // lea RAX,01234h[RAX][RBP] 0x48, 0x8D, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, // lea RAX,012345678h[RAX][RBP] 0x49, 0x8D, 0x44, 0x05, 0x12, // lea RAX,012h[RAX][R13] 0x49, 0x8D, 0x84, 0x05, 0x34, 0x12, 0x00, 0x00, // lea RAX,01234h[RAX][R13] 0x49, 0x8D, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, // lea RAX,012345678h[RAX][R13] 0x48, 0x8D, 0x04, 0x24, // lea RAX,[RSP] 0x49, 0x8D, 0x04, 0x24, // lea RAX,[R12] ]; asm { call L1 ; // Right lea RAX, [RAX+RAX]; lea RAX, [RAX+RCX]; lea RAX, [RAX+RDX]; lea RAX, [RAX+RBX]; //lea RAX, [RAX+RSP]; RSP can't be on the right lea RAX, [RAX+RBP]; lea RAX, [RAX+RSI]; lea RAX, [RAX+RDI]; lea RAX, [RAX+R8]; lea RAX, [RAX+R9]; lea RAX, [RAX+R10]; lea RAX, [RAX+R11]; lea RAX, [RAX+R12]; lea RAX, [RAX+R13]; lea RAX, [RAX+R14]; lea RAX, [RAX+R15]; // Left lea RAX, [RAX+RAX]; lea RAX, [RCX+RAX]; lea RAX, [RDX+RAX]; lea RAX, [RBX+RAX]; lea RAX, [RSP+RAX]; lea RAX, [RBP+RAX]; // Good gets disp+8 correctly lea RAX, [RSI+RAX]; lea RAX, [RDI+RAX]; lea RAX, [R8+RAX]; lea RAX, [R9+RAX]; lea RAX, [R10+RAX]; lea RAX, [R11+RAX]; lea RAX, [R12+RAX]; lea RAX, [R13+RAX]; // Good disp+8 lea RAX, [R14+RAX]; lea RAX, [R15+RAX]; // Right and Left lea RAX, [R12+R12]; lea RAX, [R13+R12]; lea RAX, [R14+R12]; lea RAX, [R12+R13]; lea RAX, [R13+R13]; lea RAX, [R14+R13]; lea RAX, [R12+R14]; lea RAX, [R13+R14]; lea RAX, [R14+R14]; // Disp8/32 checks lea RAX, [RCX+RAX+0x12]; lea RAX, [RCX+RAX+0x1234]; lea RAX, [RCX+RAX+0x1234_5678]; lea RAX, [RBP+RAX+0x12]; lea RAX, [RBP+RAX+0x1234]; lea RAX, [RBP+RAX+0x1234_5678]; lea RAX, [R13+RAX+0x12]; lea RAX, [R13+RAX+0x1234]; lea RAX, [R13+RAX+0x1234_5678]; lea RAX, [RSP]; lea RAX, [R12]; L1: pop RAX ; mov p[RBP],RAX ; } foreach (ref i, b; data) { //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b); assert(p[i] == b); } } /****************************************************/ void test54() { int x; ubyte* p; static ubyte[] data = [ 0xFE, 0xC8, // dec AL 0xFE, 0xCC, // dec AH 0x66, 0xFF, 0xC8, // dec AX 0xFF, 0xC8, // dec EAX 0x48, 0xFF, 0xC8, // dec RAX 0x49, 0xFF, 0xCA, // dec R10 0xFE, 0xC0, // inc AL 0xFE, 0xC4, // inc AH 0x66, 0xFF, 0xC0, // inc AX 0xFF, 0xC0, // inc EAX 0x48, 0xFF, 0xC0, // inc RAX 0x49, 0xFF, 0xC2, // inc R10 0x66, 0x44, 0x0F, 0xA4, 0xC0, 0x04, // shld AX, R8W, 4 0x66, 0x44, 0x0F, 0xA5, 0xC0, // shld AX, R8W, CL 0x44, 0x0F, 0xA4, 0xC0, 0x04, // shld EAX, R8D, 4 0x44, 0x0F, 0xA5, 0xC0, // shld EAX, R8D, CL 0x4C, 0x0F, 0xA4, 0xC0, 0x04, // shld RAX, R8 , 4 0x4C, 0x0F, 0xA5, 0xC0, // shld RAX, R8 , CL 0x66, 0x44, 0x0F, 0xAC, 0xC0, 0x04, // shrd AX, R8W, 4 0x66, 0x44, 0x0F, 0xAD, 0xC0, // shrd AX, R8W, CL 0x44, 0x0F, 0xAC, 0xC0, 0x04, // shrd EAX, R8D, 4 0x44, 0x0F, 0xAD, 0xC0, // shrd EAX, R8D, CL 0x4C, 0x0F, 0xAC, 0xC0, 0x04, // shrd RAX, R8 , 4 0x4C, 0x0F, 0xAD, 0xC0 // shrd RAX, R8 , CL ]; asm { call L1; dec AL; dec AH; dec AX; dec EAX; dec RAX; dec R10; inc AL; inc AH; inc AX; inc EAX; inc RAX; inc R10; shld AX, R8W, 4; shld AX, R8W, CL; shld EAX, R8D, 4; shld EAX, R8D, CL; shld RAX, R8 , 4; shld RAX, R8 , CL; shrd AX, R8W, 4; shrd AX, R8W, CL; shrd EAX, R8D, 4; shrd EAX, R8D, CL; shrd RAX, R8 , 4; shrd RAX, R8 , CL; L1: pop RAX; mov p[RBP],RAX; } foreach (ref i, b; data) { //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b); assert(p[i] == b); } } /****************************************************/ void test55() { int x; ubyte* p; enum NOP = 0x9090_9090_9090_9090; static ubyte[] data = [ 0x0F, 0x87, 0xFF, 0xFF, 0, 0, // ja $ + 0xFFFF 0x72, 0x18, // jb Lb 0x0F, 0x82, 0x92, 0x00, 0, 0, // jc Lc 0x0F, 0x84, 0x0C, 0x01, 0, 0, // je Le 0xEB, 0x0A, // jmp Lb 0xE9, 0x85, 0x00, 0x00, 0, // jmp Lc 0xE9, 0x00, 0x01, 0x00, 0, // jmp Le ]; asm { call L1; ja $+0x0_FFFF; jb Lb; jc Lc; je Le; jmp Lb; jmp Lc; jmp Le; Lb: dq NOP,NOP,NOP,NOP; // 32 dq NOP,NOP,NOP,NOP; // 64 dq NOP,NOP,NOP,NOP; // 96 dq NOP,NOP,NOP,NOP; // 128 Lc: dq NOP,NOP,NOP,NOP; // 160 dq NOP,NOP,NOP,NOP; // 192 dq NOP,NOP,NOP,NOP; // 224 dq NOP,NOP,NOP,NOP; // 256 Le: nop; L1: pop RAX; mov p[RBP],RAX; } foreach (ref i, b; data) { //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b); assert(p[i] == b); } } /****************************************************/ void test56() { int x; x = foo56(); assert(x == 42); } int foo56() { asm { naked; xor EAX,EAX; jz bar56; ret; } } void bar56() { asm { naked; mov EAX, 42; ret; } } /****************************************************/ /* ======================= SSSE3 ======================= */ void test57() { ubyte* p; M64 m64; M128 m128; static ubyte[] data = [ 0x0F, 0x3A, 0x0F, 0xCA, 0x03, // palignr MM1, MM2, 3 0x66, 0x0F, 0x3A, 0x0F, 0xCA, 0x03, // palignr XMM1, XMM2, 3 0x0F, 0x3A, 0x0F, 0x5D, 0xC8, 0x03, // palignr MM3, -0x38[RBP], 3 0x66, 0x0F, 0x3A, 0x0F, 0x5D, 0xD0, 0x03, // palignr XMM3, -0x30[RBP], 3 0x0F, 0x38, 0x02, 0xCA, // phaddd MM1, MM2 0x66, 0x0F, 0x38, 0x02, 0xCA, // phaddd XMM1, XMM2 0x0F, 0x38, 0x02, 0x5D, 0xC8, // phaddd MM3, -0x38[RBP] 0x66, 0x0F, 0x38, 0x02, 0x5D, 0xD0, // phaddd XMM3, -0x30[RBP] 0x0F, 0x38, 0x01, 0xCA, // phaddw MM1, MM2 0x66, 0x0F, 0x38, 0x01, 0xCA, // phaddw XMM1, XMM2 0x0F, 0x38, 0x01, 0x5D, 0xC8, // phaddw MM3, -0x38[RBP] 0x66, 0x0F, 0x38, 0x01, 0x5D, 0xD0, // phaddw XMM3, -0x30[RBP] 0x0F, 0x38, 0x03, 0xCA, // phaddsw MM1, MM2 0x66, 0x0F, 0x38, 0x03, 0xCA, // phaddsw XMM1, XMM2 0x0F, 0x38, 0x03, 0x5D, 0xC8, // phaddsw MM3, -0x38[RBP] 0x66, 0x0F, 0x38, 0x03, 0x5D, 0xD0, // phaddsw XMM3, -0x30[RBP] 0x0F, 0x38, 0x06, 0xCA, // phsubd MM1, MM2 0x66, 0x0F, 0x38, 0x06, 0xCA, // phsubd XMM1, XMM2 0x0F, 0x38, 0x06, 0x5D, 0xC8, // phsubd MM3, -0x38[RBP] 0x66, 0x0F, 0x38, 0x06, 0x5D, 0xD0, // phsubd XMM3, -0x30[RBP] 0x0F, 0x38, 0x05, 0xCA, // phsubw MM1, MM2 0x66, 0x0F, 0x38, 0x05, 0xCA, // phsubw XMM1, XMM2 0x0F, 0x38, 0x05, 0x5D, 0xC8, // phsubw MM3, -0x38[RBP] 0x66, 0x0F, 0x38, 0x05, 0x5D, 0xD0, // phsubw XMM3, -0x30[RBP] 0x0F, 0x38, 0x07, 0xCA, // phsubsw MM1, MM2 0x66, 0x0F, 0x38, 0x07, 0xCA, // phsubsw XMM1, XMM2 0x0F, 0x38, 0x07, 0x5D, 0xC8, // phsubsw MM3, -0x38[RBP] 0x66, 0x0F, 0x38, 0x07, 0x5D, 0xD0, // phsubsw XMM3, -0x30[RBP] 0x0F, 0x38, 0x04, 0xCA, // pmaddubsw MM1, MM2 0x66, 0x0F, 0x38, 0x04, 0xCA, // pmaddubsw XMM1, XMM2 0x0F, 0x38, 0x04, 0x5D, 0xC8, // pmaddubsw MM3, -0x38[RBP] 0x66, 0x0F, 0x38, 0x04, 0x5D, 0xD0, // pmaddubsw XMM3, -0x30[RBP] 0x0F, 0x38, 0x0B, 0xCA, // pmulhrsw MM1, MM2 0x66, 0x0F, 0x38, 0x0B, 0xCA, // pmulhrsw XMM1, XMM2 0x0F, 0x38, 0x0B, 0x5D, 0xC8, // pmulhrsw MM3, -0x38[RBP] 0x66, 0x0F, 0x38, 0x0B, 0x5D, 0xD0, // pmulhrsw XMM3, -0x30[RBP] 0x0F, 0x38, 0x00, 0xCA, // pshufb MM1, MM2 0x66, 0x0F, 0x38, 0x00, 0xCA, // pshufb XMM1, XMM2 0x0F, 0x38, 0x00, 0x5D, 0xC8, // pshufb MM3, -0x38[RBP] 0x66, 0x0F, 0x38, 0x00, 0x5D, 0xD0, // pshufb XMM3, -0x30[RBP] 0x0F, 0x38, 0x1C, 0xCA, // pabsb MM1, MM2 0x66, 0x0F, 0x38, 0x1C, 0xCA, // pabsb XMM1, XMM2 0x0F, 0x38, 0x1C, 0x5D, 0xC8, // pabsb MM3, -0x38[RBP] 0x66, 0x0F, 0x38, 0x1C, 0x5D, 0xD0, // pabsb XMM3, -0x30[RBP] 0x0F, 0x38, 0x1E, 0xCA, // pabsd MM1, MM2 0x66, 0x0F, 0x38, 0x1E, 0xCA, // pabsd XMM1, XMM2 0x0F, 0x38, 0x1E, 0x5D, 0xC8, // pabsd MM3, -0x38[RBP] 0x66, 0x0F, 0x38, 0x1E, 0x5D, 0xD0, // pabsd XMM3, -0x30[RBP] 0x0F, 0x38, 0x1D, 0xCA, // pabsw MM1, MM2 0x66, 0x0F, 0x38, 0x1D, 0xCA, // pabsw XMM1, XMM2 0x0F, 0x38, 0x1D, 0x5D, 0xC8, // pabsw MM3, -0x38[RBP] 0x66, 0x0F, 0x38, 0x1D, 0x5D, 0xD0, // pabsw XMM3, -0x30[RBP] 0x0F, 0x38, 0x08, 0xCA, // psignb MM1, MM2 0x66, 0x0F, 0x38, 0x08, 0xCA, // psignb XMM1, XMM2 0x0F, 0x38, 0x08, 0x5D, 0xC8, // psignb MM3, -0x38[RBP] 0x66, 0x0F, 0x38, 0x08, 0x5D, 0xD0, // psignb XMM3, -0x30[RBP] 0x0F, 0x38, 0x0A, 0xCA, // psignd MM1, MM2 0x66, 0x0F, 0x38, 0x0A, 0xCA, // psignd XMM1, XMM2 0x0F, 0x38, 0x0A, 0x5D, 0xC8, // psignd MM3, -0x38[RBP] 0x66, 0x0F, 0x38, 0x0A, 0x5D, 0xD0, // psignd XMM3, -0x30[RBP] 0x0F, 0x38, 0x09, 0xCA, // psignw MM1, MM2 0x66, 0x0F, 0x38, 0x09, 0xCA, // psignw XMM1, XMM2 0x0F, 0x38, 0x09, 0x5D, 0xC8, // psignw MM3, -0x38[RBP] 0x66, 0x0F, 0x38, 0x09, 0x5D, 0xD0, // psignw XMM3, -0x30[RBP] ]; asm { call L1; palignr MM1, MM2, 3; palignr XMM1, XMM2, 3; palignr MM3, m64 , 3; palignr XMM3, m128, 3; phaddd MM1, MM2; phaddd XMM1, XMM2; phaddd MM3, m64; phaddd XMM3, m128; phaddw MM1, MM2; phaddw XMM1, XMM2; phaddw MM3, m64; phaddw XMM3, m128; phaddsw MM1, MM2; phaddsw XMM1, XMM2; phaddsw MM3, m64; phaddsw XMM3, m128; phsubd MM1, MM2; phsubd XMM1, XMM2; phsubd MM3, m64; phsubd XMM3, m128; phsubw MM1, MM2; phsubw XMM1, XMM2; phsubw MM3, m64; phsubw XMM3, m128; phsubsw MM1, MM2; phsubsw XMM1, XMM2; phsubsw MM3, m64; phsubsw XMM3, m128; pmaddubsw MM1, MM2; pmaddubsw XMM1, XMM2; pmaddubsw MM3, m64; pmaddubsw XMM3, m128; pmulhrsw MM1, MM2; pmulhrsw XMM1, XMM2; pmulhrsw MM3, m64; pmulhrsw XMM3, m128; pshufb MM1, MM2; pshufb XMM1, XMM2; pshufb MM3, m64; pshufb XMM3, m128; pabsb MM1, MM2; pabsb XMM1, XMM2; pabsb MM3, m64; pabsb XMM3, m128; pabsd MM1, MM2; pabsd XMM1, XMM2; pabsd MM3, m64; pabsd XMM3, m128; pabsw MM1, MM2; pabsw XMM1, XMM2; pabsw MM3, m64; pabsw XMM3, m128; psignb MM1, MM2; psignb XMM1, XMM2; psignb MM3, m64; psignb XMM3, m128; psignd MM1, MM2; psignd XMM1, XMM2; psignd MM3, m64; psignd XMM3, m128; psignw MM1, MM2; psignw XMM1, XMM2; psignw MM3, m64; psignw XMM3, m128; L1: pop RAX; mov p[RBP],RAX; } foreach (ref i, b; data) { //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b); assert(p[i] == b); } } /****************************************************/ /* ======================= SSE4.1 ======================= */ void test58() { ubyte* p; byte m8; short m16; int m32; M64 m64; M128 m128; static ubyte[] data = [ 0x66, 0x0F, 0x3A, 0x0D, 0xCA, 3,// blendpd XMM1,XMM2,0x3 0x66, 0x0F, 0x3A, 0x0D, 0x5D, 0xD0, 3,// blendpd XMM3,XMMWORD PTR [RBP-0x30],0x3 0x66, 0x0F, 0x3A, 0x0C, 0xCA, 3,// blendps XMM1,XMM2,0x3 0x66, 0x0F, 0x3A, 0x0C, 0x5D, 0xD0, 3,// blendps XMM3,XMMWORD PTR [RBP-0x30],0x3 0x66, 0x0F, 0x38, 0x15, 0xCA, // blendvpd XMM1,XMM2,XMM0 0x66, 0x0F, 0x38, 0x15, 0x5D, 0xD0, // blendvpd XMM3,XMMWORD PTR [RBP-0x30],XMM0 0x66, 0x0F, 0x38, 0x14, 0xCA, // blendvps XMM1,XMM2,XMM0 0x66, 0x0F, 0x38, 0x14, 0x5D, 0xD0, // blendvps XMM3,XMMWORD PTR [RBP-0x30],XMM0 0x66, 0x0F, 0x3A, 0x41, 0xCA, 3,// dppd XMM1,XMM2,0x3 0x66, 0x0F, 0x3A, 0x41, 0x5D, 0xD0, 3,// dppd XMM3,XMMWORD PTR [RBP-0x30],0x3 0x66, 0x0F, 0x3A, 0x40, 0xCA, 3,// dpps XMM1,XMM2,0x3 0x66, 0x0F, 0x3A, 0x40, 0x5D, 0xD0, 3,// dpps XMM3,XMMWORD PTR [RBP-0x30],0x3 0x66, 0x0F, 0x3A, 0x17, 0xD2, 3,// extractps EDX,XMM2,0x3 0x66, 0x0F, 0x3A, 0x17, 0x55, 0xBC, 3,// extractps DWORD PTR [RBP-0x44],XMM2,0x3 0x66, 0x0F, 0x3A, 0x21, 0xCA, 3,// insertps XMM1,XMM2,0x3 0x66, 0x0F, 0x3A, 0x21, 0x5D, 0xBC, 3,// insertps XMM3,DWORD PTR [RBP-0x44],0x3 0x66, 0x0F, 0x38, 0x2A, 0x4D, 0xD0, // movntdqa XMM1,XMMWORD PTR [RBP-0x30] 0x66, 0x0F, 0x3A, 0x42, 0xCA, 3,// mpsadbw XMM1,XMM2,0x3 0x66, 0x0F, 0x3A, 0x42, 0x5D, 0xD0, 3,// mpsadbw XMM3,XMMWORD PTR [RBP-0x30],0x3 0x66, 0x0F, 0x38, 0x2B, 0xCA, // packusdw XMM1,XMM2 0x66, 0x0F, 0x38, 0x2B, 0x5D, 0xD0, // packusdw XMM3,XMMWORD PTR [RBP-0x30] 0x66, 0x0F, 0x38, 0x10, 0xCA, // pblendvb XMM1,XMM2,XMM0 0x66, 0x0F, 0x38, 0x10, 0x5D, 0xD0, // pblendvb XMM3,XMMWORD PTR [RBP-0x30],XMM0 0x66, 0x0F, 0x3A, 0x0E, 0xCA, 3,// pblendw XMM1,XMM2,0x3 0x66, 0x0F, 0x3A, 0x0E, 0x5D, 0xD0, 3,// pblendw XMM3,XMMWORD PTR [RBP-0x30],0x3 0x66, 0x0F, 0x38, 0x29, 0xCA, // pcmpeqq XMM1,XMM2 0x66, 0x0F, 0x38, 0x29, 0x5D, 0xD0, // pcmpeqq XMM3,XMMWORD PTR [RBP-0x30] 0x66, 0x0F, 0x3A, 0x14, 0xD0, 3,// pextrb EAX,XMM2,0x3 0x66, 0x0F, 0x3A, 0x14, 0xD3, 3,// pextrb EBX,XMM2,0x3 0x66, 0x0F, 0x3A, 0x14, 0xD1, 3,// pextrb ECX,XMM2,0x3 0x66, 0x0F, 0x3A, 0x14, 0xD2, 3,// pextrb EDX,XMM2,0x3 0x66, 0x0F, 0x3A, 0x14, 0xD0, 3,// pextrb EAX,XMM2,0x3 0x66, 0x0F, 0x3A, 0x14, 0xD3, 3,// pextrb EBX,XMM2,0x3 0x66, 0x41, 0x0F, 0x3A, 0x14, 0xD0, 3,// pextrb R8D,XMM2,0x3 0x66, 0x41, 0x0F, 0x3A, 0x14, 0xD2, 3,// pextrb R10D,XMM2,0x3 0x66, 0x0F, 0x3A, 0x14, 0x5D, 0xB8, 3,// pextrb BYTE PTR [RBP-0x48],XMM3,0x3 0x66, 0x0F, 0x3A, 0x16, 0xD0, 3,// pextrd EAX,XMM2,0x3 0x66, 0x0F, 0x3A, 0x16, 0xD3, 3,// pextrd EBX,XMM2,0x3 0x66, 0x0F, 0x3A, 0x16, 0xD1, 3,// pextrd ECX,XMM2,0x3 0x66, 0x0F, 0x3A, 0x16, 0xD2, 3,// pextrd EDX,XMM2,0x3 0x66, 0x0F, 0x3A, 0x16, 0x5D, 0xBC, 3,// pextrd DWORD PTR [RBP-0x44],XMM3,0x3 0x66, 0x48, 0x0F, 0x3A, 0x16, 0xD0, 3,// pextrq RAX,XMM2,0x3 0x66, 0x48, 0x0F, 0x3A, 0x16, 0xD3, 3,// pextrq RBX,XMM2,0x3 0x66, 0x48, 0x0F, 0x3A, 0x16, 0xD1, 3,// pextrq RCX,XMM2,0x3 0x66, 0x48, 0x0F, 0x3A, 0x16, 0xD2, 3,// pextrq RDX,XMM2,0x3 0x66, 0x48, 0x0F, 0x3A, 0x16, 0x5D, 0xC0, 3,// pextrq QWORD PTR [RBP-0x40],XMM3,0x3 0x66, 0x0F, 0xC5, 0xC2, 3,// pextrw EAX,XMM2,0x3 0x66, 0x0F, 0xC5, 0xDA, 3,// pextrw EBX,XMM2,0x3 0x66, 0x0F, 0xC5, 0xCA, 3,// pextrw ECX,XMM2,0x3 0x66, 0x0F, 0xC5, 0xD2, 3,// pextrw EDX,XMM2,0x3 0x66, 0x0F, 0xC5, 0xC2, 3,// pextrw EAX,XMM2,0x3 0x66, 0x0F, 0xC5, 0xDA, 3,// pextrw EBX,XMM2,0x3 0x66, 0x44, 0x0F, 0xC5, 0xC2, 3,// pextrw R8D,XMM2,0x3 0x66, 0x44, 0x0F, 0xC5, 0xD2, 3,// pextrw R10D,XMM2,0x3 0x66, 0x0F, 0x3A, 0x15, 0x5D, 0xBA, 3,// pextrw WORD PTR [RBP-0x46],XMM3,0x3 0x66, 0x0F, 0x38, 0x41, 0xCA, // phminposuw XMM1,XMM2 0x66, 0x0F, 0x38, 0x41, 0x5D, 0xD0, // phminposuw XMM3,XMMWORD PTR [RBP-0x30] 0x66, 0x0F, 0x3A, 0x20, 0xC8, 3,// pinsrb XMM1,EAX,0x3 0x66, 0x0F, 0x3A, 0x20, 0xCB, 3,// pinsrb XMM1,EBX,0x3 0x66, 0x0F, 0x3A, 0x20, 0xC9, 3,// pinsrb XMM1,ECX,0x3 0x66, 0x0F, 0x3A, 0x20, 0xCA, 3,// pinsrb XMM1,EDX,0x3 0x66, 0x0F, 0x3A, 0x20, 0x5D, 0xB8, 3,// pinsrb XMM3,BYTE PTR [RBP-0x48],0x3 0x66, 0x0F, 0x3A, 0x22, 0xC8, 3,// pinsrd XMM1,EAX,0x3 0x66, 0x0F, 0x3A, 0x22, 0xCB, 3,// pinsrd XMM1,EBX,0x3 0x66, 0x0F, 0x3A, 0x22, 0xC9, 3,// pinsrd XMM1,ECX,0x3 0x66, 0x0F, 0x3A, 0x22, 0xCA, 3,// pinsrd XMM1,EDX,0x3 0x66, 0x0F, 0x3A, 0x22, 0x5D, 0xBC, 3,// pinsrd XMM3,DWORD PTR [RBP-0x44],0x3 0x66, 0x48, 0x0F, 0x3A, 0x22, 0xC8, 3,// pinsrq XMM1,RAX,0x3 0x66, 0x48, 0x0F, 0x3A, 0x22, 0xCB, 3,// pinsrq XMM1,RBX,0x3 0x66, 0x48, 0x0F, 0x3A, 0x22, 0xC9, 3,// pinsrq XMM1,RCX,0x3 0x66, 0x48, 0x0F, 0x3A, 0x22, 0xCA, 3,// pinsrq XMM1,RDX,0x3 0x66, 0x48, 0x0F, 0x3A, 0x22, 0x5D, 0xC0, 3,// pinsrq XMM3,QWORD PTR [RBP-0x40],0x3 0x66, 0x0F, 0x38, 0x3C, 0xCA, // pmaxsb XMM1,XMM2 0x66, 0x0F, 0x38, 0x3C, 0x5D, 0xD0, // pmaxsb XMM3,XMMWORD PTR [RBP-0x30] 0x66, 0x0F, 0x38, 0x3D, 0xCA, // pmaxsd XMM1,XMM2 0x66, 0x0F, 0x38, 0x3D, 0x5D, 0xD0, // pmaxsd XMM3,XMMWORD PTR [RBP-0x30] 0x66, 0x0F, 0x38, 0x3F, 0xCA, // pmaxud XMM1,XMM2 0x66, 0x0F, 0x38, 0x3F, 0x5D, 0xD0, // pmaxud XMM3,XMMWORD PTR [RBP-0x30] 0x66, 0x0F, 0x38, 0x3E, 0xCA, // pmaxuw XMM1,XMM2 0x66, 0x0F, 0x38, 0x3E, 0x5D, 0xD0, // pmaxuw XMM3,XMMWORD PTR [RBP-0x30] 0x66, 0x0F, 0x38, 0x38, 0xCA, // pminsb XMM1,XMM2 0x66, 0x0F, 0x38, 0x38, 0x5D, 0xD0, // pminsb XMM3,XMMWORD PTR [RBP-0x30] 0x66, 0x0F, 0x38, 0x39, 0xCA, // pminsd XMM1,XMM2 0x66, 0x0F, 0x38, 0x39, 0x5D, 0xD0, // pminsd XMM3,XMMWORD PTR [RBP-0x30] 0x66, 0x0F, 0x38, 0x3B, 0xCA, // pminud XMM1,XMM2 0x66, 0x0F, 0x38, 0x3B, 0x5D, 0xD0, // pminud XMM3,XMMWORD PTR [RBP-0x30] 0x66, 0x0F, 0x38, 0x3A, 0xCA, // pminuw XMM1,XMM2 0x66, 0x0F, 0x38, 0x3A, 0x5D, 0xD0, // pminuw XMM3,XMMWORD PTR [RBP-0x30] 0x66, 0x0F, 0x38, 0x20, 0xCA, // pmovsxbw XMM1,XMM2 0x66, 0x0F, 0x38, 0x20, 0x5D, 0xC0, // pmovsxbw XMM3,QWORD PTR [RBP-0x40] 0x66, 0x0F, 0x38, 0x21, 0xCA, // pmovsxbd XMM1,XMM2 0x66, 0x0F, 0x38, 0x21, 0x5D, 0xBC, // pmovsxbd XMM3,DWORD PTR [RBP-0x44] 0x66, 0x0F, 0x38, 0x22, 0xCA, // pmovsxbq XMM1,XMM2 0x66, 0x0F, 0x38, 0x22, 0x5D, 0xBA, // pmovsxbq XMM3,WORD PTR [RBP-0x46] 0x66, 0x0F, 0x38, 0x23, 0xCA, // pmovsxwd XMM1,XMM2 0x66, 0x0F, 0x38, 0x23, 0x5D, 0xC0, // pmovsxwd XMM3,QWORD PTR [RBP-0x40] 0x66, 0x0F, 0x38, 0x24, 0xCA, // pmovsxwq XMM1,XMM2 0x66, 0x0F, 0x38, 0x24, 0x5D, 0xBC, // pmovsxwq XMM3,DWORD PTR [RBP-0x44] 0x66, 0x0F, 0x38, 0x25, 0xCA, // pmovsxdq XMM1,XMM2 0x66, 0x0F, 0x38, 0x25, 0x5D, 0xC0, // pmovsxdq XMM3,QWORD PTR [RBP-0x40] 0x66, 0x0F, 0x38, 0x30, 0xCA, // pmovzxbw XMM1,XMM2 0x66, 0x0F, 0x38, 0x30, 0x5D, 0xC0, // pmovzxbw XMM3,QWORD PTR [RBP-0x40] 0x66, 0x0F, 0x38, 0x31, 0xCA, // pmovzxbd XMM1,XMM2 0x66, 0x0F, 0x38, 0x31, 0x5D, 0xBC, // pmovzxbd XMM3,DWORD PTR [RBP-0x44] 0x66, 0x0F, 0x38, 0x32, 0xCA, // pmovzxbq XMM1,XMM2 0x66, 0x0F, 0x38, 0x32, 0x5D, 0xBA, // pmovzxbq XMM3,WORD PTR [RBP-0x46] 0x66, 0x0F, 0x38, 0x33, 0xCA, // pmovzxwd XMM1,XMM2 0x66, 0x0F, 0x38, 0x33, 0x5D, 0xC0, // pmovzxwd XMM3,QWORD PTR [RBP-0x40] 0x66, 0x0F, 0x38, 0x34, 0xCA, // pmovzxwq XMM1,XMM2 0x66, 0x0F, 0x38, 0x34, 0x5D, 0xBC, // pmovzxwq XMM3,DWORD PTR [RBP-0x44] 0x66, 0x0F, 0x38, 0x35, 0xCA, // pmovzxdq XMM1,XMM2 0x66, 0x0F, 0x38, 0x35, 0x5D, 0xC0, // pmovzxdq XMM3,QWORD PTR [RBP-0x40] 0x66, 0x0F, 0x38, 0x28, 0xCA, // pmuldq XMM1,XMM2 0x66, 0x0F, 0x38, 0x28, 0x5D, 0xD0, // pmuldq XMM3,XMMWORD PTR [RBP-0x30] 0x66, 0x0F, 0x38, 0x40, 0xCA, // pmulld XMM1,XMM2 0x66, 0x0F, 0x38, 0x40, 0x5D, 0xD0, // pmulld XMM3,XMMWORD PTR [RBP-0x30] 0x66, 0x0F, 0x38, 0x17, 0xCA, // ptest XMM1,XMM2 0x66, 0x0F, 0x38, 0x17, 0x5D, 0xD0, // ptest XMM3,XMMWORD PTR [RBP-0x30] 0x66, 0x0F, 0x3A, 0x09, 0xCA, 3,// roundpd XMM1,XMM2,0x3 0x66, 0x0F, 0x3A, 0x09, 0x5D, 0xD0, 3,// roundpd XMM3,XMMWORD PTR [RBP-0x30],0x3 0x66, 0x0F, 0x3A, 0x08, 0xCA, 3,// roundps XMM1,XMM2,0x3 0x66, 0x0F, 0x3A, 0x08, 0x5D, 0xD0, 3,// roundps XMM3,XMMWORD PTR [RBP-0x30],0x3 0x66, 0x0F, 0x3A, 0x0B, 0xCA, 3,// roundsd XMM1,XMM2,0x3 0x66, 0x0F, 0x3A, 0x0B, 0x5D, 0xC0, 3,// roundsd XMM3,QWORD PTR [RBP-0x40],0x3 0x66, 0x0F, 0x3A, 0x0A, 0xCA, 3,// roundss XMM1,XMM2,0x3 0x66, 0x0F, 0x3A, 0x0A, 0x4D, 0xBC, 3,// roundss xmm1,dword ptr [rbp-0x44],0x3 ]; asm { call L1; blendpd XMM1, XMM2, 3; blendpd XMM3, m128, 3; blendps XMM1, XMM2, 3; blendps XMM3, m128, 3; blendvpd XMM1, XMM2, XMM0; blendvpd XMM3, m128, XMM0; blendvps XMM1, XMM2, XMM0; blendvps XMM3, m128, XMM0; dppd XMM1, XMM2, 3; dppd XMM3, m128, 3; dpps XMM1, XMM2, 3; dpps XMM3, m128, 3; extractps EDX, XMM2, 3; extractps m32, XMM2, 3; insertps XMM1, XMM2, 3; insertps XMM3, m32, 3; movntdqa XMM1, m128; mpsadbw XMM1, XMM2, 3; mpsadbw XMM3, m128, 3; packusdw XMM1, XMM2; packusdw XMM3, m128; pblendvb XMM1, XMM2, XMM0; pblendvb XMM3, m128, XMM0; pblendw XMM1, XMM2, 3; pblendw XMM3, m128, 3; pcmpeqq XMM1, XMM2; pcmpeqq XMM3, m128; pextrb EAX, XMM2, 3; pextrb EBX, XMM2, 3; pextrb ECX, XMM2, 3; pextrb EDX, XMM2, 3; pextrb RAX, XMM2, 3; pextrb RBX, XMM2, 3; pextrb R8 , XMM2, 3; pextrb R10, XMM2, 3; pextrb m8, XMM3, 3; pextrd EAX, XMM2, 3; pextrd EBX, XMM2, 3; pextrd ECX, XMM2, 3; pextrd EDX, XMM2, 3; pextrd m32, XMM3, 3; pextrq RAX, XMM2, 3; pextrq RBX, XMM2, 3; pextrq RCX, XMM2, 3; pextrq RDX, XMM2, 3; pextrq m64, XMM3, 3; pextrw EAX, XMM2, 3; pextrw EBX, XMM2, 3; pextrw ECX, XMM2, 3; pextrw EDX, XMM2, 3; pextrw RAX, XMM2, 3; pextrw RBX, XMM2, 3; pextrw R8 , XMM2, 3; pextrw R10, XMM2, 3; pextrw m16, XMM3, 3; phminposuw XMM1, XMM2; phminposuw XMM3, m128; pinsrb XMM1, EAX, 3; pinsrb XMM1, EBX, 3; pinsrb XMM1, ECX, 3; pinsrb XMM1, EDX, 3; pinsrb XMM3, m8, 3; pinsrd XMM1, EAX, 3; pinsrd XMM1, EBX, 3; pinsrd XMM1, ECX, 3; pinsrd XMM1, EDX, 3; pinsrd XMM3, m32, 3; pinsrq XMM1, RAX, 3; pinsrq XMM1, RBX, 3; pinsrq XMM1, RCX, 3; pinsrq XMM1, RDX, 3; pinsrq XMM3, m64, 3; pmaxsb XMM1, XMM2; pmaxsb XMM3, m128; pmaxsd XMM1, XMM2; pmaxsd XMM3, m128; pmaxud XMM1, XMM2; pmaxud XMM3, m128; pmaxuw XMM1, XMM2; pmaxuw XMM3, m128; pminsb XMM1, XMM2; pminsb XMM3, m128; pminsd XMM1, XMM2; pminsd XMM3, m128; pminud XMM1, XMM2; pminud XMM3, m128; pminuw XMM1, XMM2; pminuw XMM3, m128; pmovsxbw XMM1, XMM2; pmovsxbw XMM3, m64; pmovsxbd XMM1, XMM2; pmovsxbd XMM3, m32; pmovsxbq XMM1, XMM2; pmovsxbq XMM3, m16; pmovsxwd XMM1, XMM2; pmovsxwd XMM3, m64; pmovsxwq XMM1, XMM2; pmovsxwq XMM3, m32; pmovsxdq XMM1, XMM2; pmovsxdq XMM3, m64; pmovzxbw XMM1, XMM2; pmovzxbw XMM3, m64; pmovzxbd XMM1, XMM2; pmovzxbd XMM3, m32; pmovzxbq XMM1, XMM2; pmovzxbq XMM3, m16; pmovzxwd XMM1, XMM2; pmovzxwd XMM3, m64; pmovzxwq XMM1, XMM2; pmovzxwq XMM3, m32; pmovzxdq XMM1, XMM2; pmovzxdq XMM3, m64; pmuldq XMM1, XMM2; pmuldq XMM3, m128; pmulld XMM1, XMM2; pmulld XMM3, m128; ptest XMM1, XMM2; ptest XMM3, m128; roundpd XMM1, XMM2, 3; roundpd XMM3, m128, 3; roundps XMM1, XMM2, 3; roundps XMM3, m128, 3; roundsd XMM1, XMM2, 3; roundsd XMM3, m64, 3; roundss XMM1, XMM2, 3; roundss XMM1, m32, 3; L1: pop RAX; mov p[RBP],RAX; } foreach (ref i, b; data) { //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b); assert(p[i] == b); } } /****************************************************/ /* ======================= SSE4.2 ======================= */ void test59() { ubyte* p; byte m8; short m16; int m32; M64 m64; M128 m128; static ubyte[] data = [ 0xF2, 0x0F, 0x38, 0xF0, 0xC1, // crc32 EAX, CL 0x66, 0xF2, 0x0F, 0x38, 0xF1, 0xC1, // crc32 EAX, CX 0xF2, 0x0F, 0x38, 0xF1, 0xC1, // crc32 EAX, ECX 0xF2, 0x48, 0x0F, 0x38, 0xF0, 0xC1, // crc32 RAX, CL 0xF2, 0x48, 0x0F, 0x38, 0xF1, 0xC1, // crc32 RAX, RCX 0xF2, 0x0F, 0x38, 0xF0, 0x55, 0xB8, // crc32 EDX, byte ptr [RBP-0x48] 0x66, 0xF2, 0x0F, 0x38, 0xF1, 0x55, 0xBA, // crc32 EDX, word ptr [RBP-0x46] 0xF2, 0x0F, 0x38, 0xF1, 0x55, 0xBC, // crc32 EDX,dword ptr [RBP-0x44] 0xF2, 0x48, 0x0F, 0x38, 0xF0, 0x55, 0xB8, // crc32 RDX, byte ptr [RBP-0x48] 0xF2, 0x48, 0x0F, 0x38, 0xF1, 0x55, 0xC0, // crc32 RDX,qword ptr [RBP-0x40] 0x66, 0x0F, 0x3A, 0x61, 0xCA, 2, // pcmpestri XMM1,XMM2, 2 0x66, 0x0F, 0x3A, 0x61, 0x5D, 0xD0, 2, // pcmpestri XMM3,xmmword ptr [RBP-0x30], 2 0x66, 0x0F, 0x3A, 0x60, 0xCA, 2, // pcmpestrm XMM1,XMM2, 2 0x66, 0x0F, 0x3A, 0x60, 0x5D, 0xD0, 2, // pcmpestrm XMM3,xmmword ptr [RBP-0x30], 2 0x66, 0x0F, 0x3A, 0x63, 0xCA, 2, // pcmpistri XMM1,XMM2, 2 0x66, 0x0F, 0x3A, 0x63, 0x5D, 0xD0, 2, // pcmpistri XMM3,xmmword ptr [RBP-0x30], 2 0x66, 0x0F, 0x3A, 0x62, 0xCA, 2, // pcmpistrm XMM1,XMM2, 2 0x66, 0x0F, 0x3A, 0x62, 0x5D, 0xD0, 2, // pcmpistrm XMM3,xmmword ptr [RBP-0x30], 2 0x66, 0x0F, 0x38, 0x37, 0xCA, // pcmpgtq XMM1,XMM2 0x66, 0x0F, 0x38, 0x37, 0x5D, 0xD0, // pcmpgtq XMM3,xmmword ptr [RBP-0x30] 0x66, 0xF3, 0x0F, 0xB8, 0xC1, // popcnt AX, CX 0xF3, 0x0F, 0xB8, 0xC1, // popcnt EAX, ECX 0xF3, 0x48, 0x0F, 0xB8, 0xC1, // popcnt RAX, RCX 0x66, 0xF3, 0x0F, 0xB8, 0x55, 0xBA, // popcnt DX, word ptr [RBP-0x46] 0xF3, 0x0F, 0xB8, 0x55, 0xBC, // popcnt EDX,dword ptr [RBP-0x44] 0xF3, 0x48, 0x0F, 0xB8, 0x55, 0xC0 // popcnt RDX,qword ptr [RBP-0x40] ]; asm { call L1; crc32 EAX, CL; crc32 EAX, CX; crc32 EAX, ECX; crc32 RAX, CL; crc32 RAX, RCX; crc32 EDX, m8; crc32 EDX, m16; crc32 EDX, m32; crc32 RDX, m8; crc32 RDX, m64; pcmpestri XMM1, XMM2, 2; pcmpestri XMM3, m128, 2; pcmpestrm XMM1, XMM2, 2; pcmpestrm XMM3, m128, 2; pcmpistri XMM1, XMM2, 2; pcmpistri XMM3, m128, 2; pcmpistrm XMM1, XMM2, 2; pcmpistrm XMM3, m128, 2; pcmpgtq XMM1, XMM2; pcmpgtq XMM3, m128; popcnt AX, CX; popcnt EAX, ECX; popcnt RAX, RCX; popcnt DX, m16; popcnt EDX, m32; popcnt RDX, m64; L1: pop RAX; mov p[RBP],RAX; } foreach (ref i, b; data) { //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b); assert(p[i] == b); } } void test60() { ubyte *p; static ubyte[] data = [ 0x49, 0x8B, 0x00, // mov RAX, [R8] 0x4D, 0x8B, 0x00, // mov R8, [R8] 0x49, 0x89, 0x00, // mov [R8], RAX 0x4D, 0x89, 0x00, // mov [R8], R8 0x41, 0x0F, 0x10, 0x00, // movups XMM0, [R8] 0x45, 0x0F, 0x10, 0x00, // movups XMM8, [R8] ]; asm { call L1; mov RAX, [R8]; mov R8, [R8]; mov [R8], RAX; mov [R8], R8; movups XMM0, [R8]; movups XMM8, [R8]; L1: pop RAX; mov p[RBP], RAX; } foreach (ref i, b; data) { assert(p[i] == b); } } /* ======================= AVX ======================= */ void test61() { ubyte* p; static ubyte[] data = [ 0x0F, 0x01, 0xD0, // xgetbv 0x0F, 0x01, 0xD1, // xsetbv 0x0F, 0xAE, 0x28, // xrstor [RAX] 0x48, 0x0F, 0xAE, 0x28, // xrstor64 [RAX] 0x0F, 0xAE, 0x20, // xsave [RAX] 0x48, 0x0F, 0xAE, 0x20, // xsave64 [RAX] 0x0F, 0xC7, 0x21, // xsavec [RCX] 0x48, 0x0F, 0xC7, 0x21, // xsavec64 [RCX] 0x0F, 0xAE, 0x30, // xsaveopt [RAX] 0x48, 0x0F, 0xAE, 0x30, // xsaveopt64 [RAX] 0xC5, 0xF8, 0xAE, 0x10, // vldmxcsr [RAX] 0xC5, 0xF8, 0xAE, 0x18, // vstmxcsr [RAX] 0xC5, 0xF2, 0x58, 0xC2, // vaddss XMM0, XMM1, XMM2; 0xC5, 0x83, 0x58, 0x00, // vaddsd XMM0, XMM15, [RAX]; 0xC5, 0x78, 0x58, 0xE0, // vaddps XMM12, XMM0, XMM0; 0xC4, 0x41, 0x39, 0x58, 0xC0, // vaddpd XMM8, XMM8, XMM8; 0xC5, 0xF2, 0x5C, 0xC2, // vsubss XMM0, XMM1, XMM2; 0xC5, 0x83, 0x5C, 0x00, // vsubsd XMM0, XMM1, [RAX]; 0xC5, 0x78, 0x5C, 0xE0, // vsubps XMM12, XMM0, XMM0; 0xC4, 0x41, 0x39, 0x5C, 0xC0, // vsubpd XMM8, XMM8, XMM8; 0xC5, 0xF3, 0xD0, 0xC2, // vaddsubps XMM0, XMM1, XMM2; 0xC5, 0xF7, 0xD0, 0xC2, // vaddsubps YMM0, YMM1, YMM2; 0xC5, 0x75, 0xD0, 0xC2, // vaddsubpd YMM8, YMM1, YMM2; 0xC5, 0x05, 0xD0, 0x78, 0x40, // vaddsubpd YMM15, YMM15, 64[RAX]; 0xC4, 0xE3, 0x7D, 0x40, 0xC0, 0x00, // vdpps YMM0, YMM0, YMM0, 0 0xC4, 0xE3, 0x79, 0x41, 0xC0, 0x88, // vdppd XMM0, XMM0, XMM0, 0x88 0xC5, 0xBD, 0x7C, 0x07, // vhaddpd YMM0, YMM8, [RDI]; 0xC5, 0xBB, 0x7C, 0xC1, // vhaddps XMM0, XMM8, XMM1; 0xC5, 0xFD, 0x5F, 0xC1, // vmaxpd YMM0, YMM0, YMM1; 0xC5, 0xF9, 0x5F, 0x00, // vmaxpd XMM0, XMM0, [RAX]; 0xC5, 0xFC, 0x5F, 0xC1, // vmaxps YMM0, YMM0, YMM1; 0xC5, 0xF8, 0x5F, 0x00, // vmaxps XMM0, XMM0, [RAX]; 0xC5, 0xFB, 0x5F, 0x00, // vmaxsd XMM0, XMM0, [RAX]; 0xC5, 0xFA, 0x5F, 0x00, // vmaxss XMM0, XMM0, [RAX]; 0xC5, 0xFD, 0x5D, 0xC1, // vminpd YMM0, YMM0, YMM1; 0xC5, 0xF9, 0x5D, 0x00, // vminpd XMM0, XMM0, [RAX]; 0xC5, 0xFC, 0x5D, 0xC1, // vminps YMM0, YMM0, YMM1; 0xC5, 0xF8, 0x5D, 0x00, // vminps XMM0, XMM0, [RAX]; 0xC5, 0xFB, 0x5D, 0x00, // vminsd XMM0, XMM0, [RAX]; 0xC5, 0xFA, 0x5D, 0x00, // vminss XMM0, XMM0, [RAX]; 0xC5, 0xF9, 0x50, 0xC0, // vmovmskpd EAX, XMM0; 0xC5, 0xFD, 0x50, 0xF8, // vmovmskpd EDI, YMM0; 0xC4, 0xC1, 0x7C, 0x50, 0xC7, // vmovmskps EAX, YMM15; 0xC5, 0x7C, 0x50, 0xC0, // vmovmskps R8D, YMM0; 0xC5, 0xF9, 0xD7, 0xC0, // vpmovmskb EAX, XMM0; 0xC4, 0xE3, 0x71, 0x42, 0xC2, 0x00, // vmpsadbw XMM0, XMM1, XMM2, 0x00; 0xC4, 0x43, 0x31, 0x42, 0xC2, 0xFF, // vmpsadbw XMM8, XMM9, XMM10, 0xFF; 0xC4, 0xE2, 0x79, 0x1C, 0x00, // vpabsb XMM0, [RAX]; 0xC4, 0xC2, 0x79, 0x1D, 0xCF, // vpabsw XMM1, XMM15; 0xC4, 0xE2, 0x79, 0x1E, 0x0B, // vpabsd XMM1, [RBX]; 0xC5, 0xF9, 0xFC, 0x00, // vpaddb XMM0, XMM0, [RAX]; 0xC4, 0x41, 0x39, 0xFD, 0xC7, // vpaddw XMM8, XMM8, XMM15; 0xC5, 0x39, 0xFE, 0x03, // vpaddd XMM8, XMM8, [RBX]; 0xC5, 0xF9, 0xD4, 0xC0, // vpaddq XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xF8, 0x00, // vpsubb XMM0, XMM0, [RAX]; 0xC4, 0x41, 0x39, 0xF9, 0xC7, // vpsubw XMM8, XMM8, XMM15; 0xC5, 0x39, 0xFA, 0x03, // vpsubd XMM8, XMM8, [RBX]; 0xC5, 0xF9, 0xFB, 0xC0, // vpsubq XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xEC, 0xC0, // vpaddsb XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xED, 0xC0, // vpaddsw XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xDC, 0xC0, // vpaddusb XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xDD, 0xC0, // vpaddusw XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xE8, 0xC0, // vpsubsb XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xE9, 0xC0, // vpsubsw XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xD8, 0xC0, // vpsubusb XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xD9, 0xC0, // vpsubusw XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xE0, 0xC0, // vpavgb XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xE3, 0xC0, // vpavgw XMM0, XMM0, XMM0; 0x66, 0x0F, 0x3A, 0x44, 0x44, 0x88, 0x40, 0x00, // pclmulqdq XMM0, 64[RAX + 4 * RCX], 0; 0xC4, 0xE3, 0x79, 0x44, 0x44, 0x88, 0x40, 0x00, // vpclmulqdq XMM0, XMM0, 64[RAX + 4 * RCX], 0; 0xC4, 0xE2, 0x79, 0x01, 0xC0, // vphaddw XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x02, 0xC0, // vphaddd XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x05, 0xC0, // vphsubw XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x06, 0xC0, // vphsubd XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x03, 0xC0, // vphaddsw XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x07, 0xC0, // vphsubsw XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x41, 0xC0, // vphminposuw XMM0, XMM0; 0xC5, 0xF9, 0xF5, 0xC0, // vpmaddwd XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x04, 0xC0, // vpmaddubsw XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x3C, 0xC0, // vpmaxsb XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x3D, 0xC0, // vpmaxsd XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xEE, 0xC0, // vpmaxsw XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xDE, 0xC0, // vpmaxub XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x3F, 0xC0, // vpmaxud XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x3E, 0xC0, // vpmaxuw XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x38, 0xC0, // vpminsb XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x39, 0xC0, // vpminsd XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xEA, 0xC0, // vpminsw XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xDA, 0xC0, // vpminub XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x3B, 0xC0, // vpminud XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x3A, 0xC0, // vpminuw XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x0B, 0xC0, // vpmulhrsw XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xE4, 0xC0, // vpmulhuw XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xE5, 0xC0, // vpmulhw XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x40, 0xC0, // vpmulld XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xD5, 0xC0, // vpmullw XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xF4, 0xC0, // vpmuludq XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x28, 0xC0, // vpmuldq XMM0, XMM0, XMM0; 0xC5, 0xF9, 0xF6, 0xC0, // vpsadbw XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x08, 0xC0, // vpsignb XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x09, 0xC0, // vpsignw XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x0A, 0xC0, // vpsignd XMM0, XMM0, XMM0; 0xC5, 0xF9, 0x73, 0xF8, 0x00, // vpslldq XMM0, XMM0, 0; 0xC5, 0xF9, 0x71, 0xF0, 0x00, // vpsllw XMM0, XMM0, 0; 0xC5, 0xF9, 0xF1, 0xC0, // vpsllw XMM0, XMM0, XMM0; 0xC5, 0xF9, 0x72, 0xF0, 0x00, // vpslld XMM0, XMM0, 0; 0xC5, 0xF9, 0xF2, 0xC0, // vpslld XMM0, XMM0, XMM0; 0xC5, 0xF9, 0x73, 0xF0, 0x00, // vpsllq XMM0, XMM0, 0; 0xC5, 0xF9, 0xF3, 0xC0, // vpsllq XMM0, XMM0, XMM0; 0xC5, 0xF9, 0x71, 0xE0, 0x00, // vpsraw XMM0, XMM0, 0; 0xC5, 0xF9, 0xE1, 0xC0, // vpsraw XMM0, XMM0, XMM0; 0xC5, 0xF9, 0x72, 0xE0, 0x00, // vpsrad XMM0, XMM0, 0; 0xC5, 0xF9, 0xE2, 0xC0, // vpsrad XMM0, XMM0, XMM0; 0xC5, 0xF9, 0x73, 0xD8, 0x00, // vpsrldq XMM0, XMM0, 0; 0xC5, 0xF9, 0x71, 0xD0, 0x00, // vpsrlw XMM0, XMM0, 0; 0xC5, 0xF9, 0xD1, 0xC0, // vpsrlw XMM0, XMM0, XMM0; 0xC5, 0xF9, 0x72, 0xD0, 0x00, // vpsrld XMM0, XMM0, 0; 0xC5, 0xF9, 0xD2, 0xC0, // vpsrld XMM0, XMM0, XMM0; 0xC5, 0xF9, 0x73, 0xD0, 0x00, // vpsrlq XMM0, XMM0, 0; 0xC5, 0xF9, 0xD3, 0xC0, // vpsrlq XMM0, XMM0, XMM0; 0xC5, 0xF8, 0x53, 0xC1, // vrcpps XMM0, XMM1; 0xC5, 0xFC, 0x53, 0xC1, // vrcpps YMM0, YMM1; 0xC5, 0xFA, 0x53, 0xC1, // vrcpss XMM0, XMM0, XMM1; 0xC4, 0xE3, 0x79, 0x09, 0xC0, 0x00, // vroundpd XMM0, XMM0, 0; 0xC4, 0xE3, 0x7D, 0x09, 0xC0, 0x00, // vroundpd YMM0, YMM0, 0; 0xC4, 0xE3, 0x79, 0x08, 0xC0, 0x00, // vroundps XMM0, XMM0, 0; 0xC4, 0xE3, 0x7D, 0x08, 0xC0, 0x00, // vroundps YMM0, YMM0, 0; 0xC4, 0xE3, 0x79, 0x0B, 0xC0, 0x00, // vroundsd XMM0, XMM0, XMM0, 0; 0xC4, 0xE3, 0x79, 0x0A, 0xC0, 0x00, // vroundss XMM0, XMM0, XMM0, 0; 0xC5, 0xF9, 0x51, 0xC0, // vsqrtpd XMM0, XMM0; 0xC5, 0xFD, 0x51, 0xC0, // vsqrtpd YMM0, YMM0; 0xC5, 0xF8, 0x51, 0xC0, // vsqrtps XMM0, XMM0; 0xC5, 0xFC, 0x51, 0xC0, // vsqrtps YMM0, YMM0; 0xC5, 0xFB, 0x51, 0xC0, // vsqrtsd XMM0, XMM0, XMM0; 0xC5, 0xFA, 0x51, 0xC0, // vsqrtss XMM0, XMM0, XMM0; 0xC5, 0xFC, 0x77, // vzeroall 0xC5, 0xF8, 0x77, // vzeroupper 0xC5, 0xF9, 0xC2, 0xC0, 0x00, // vcmppd XMM0, XMM0, XMM0, 0; 0xC5, 0xFD, 0xC2, 0xC0, 0x00, // vcmppd YMM0, YMM0, YMM0, 0; 0xC5, 0xF8, 0xC2, 0xC0, 0x00, // vcmpps XMM0, XMM0, XMM0, 0; 0xC5, 0xFC, 0xC2, 0xC0, 0x00, // vcmpps YMM0, YMM0, YMM0, 0; 0xC5, 0xFB, 0xC2, 0xC0, 0x00, // vcmpsd XMM0, XMM0, XMM0, 0; 0xC5, 0xFA, 0xC2, 0xC0, 0x00, // vcmpss XMM0, XMM0, XMM0, 0; 0xC5, 0xF9, 0x2F, 0xC0, // vcomisd XMM0, XMM0; 0xC5, 0xF8, 0x2F, 0xC0, // vcomiss XMM0, XMM0; 0xC5, 0xF9, 0x74, 0xC0, // vpcmpeqb XMM0, XMM0, XMM0; 0xC5, 0xF9, 0x75, 0xC0, // vpcmpeqw XMM0, XMM0, XMM0; 0xC5, 0xF9, 0x76, 0xC0, // vpcmpeqd XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x29, 0xC0, // vpcmpeqq XMM0, XMM0, XMM0; 0xC5, 0xF9, 0x64, 0xC0, // vpcmpgtb XMM0, XMM0, XMM0; 0xC5, 0xF9, 0x65, 0xC0, // vpcmpgtw XMM0, XMM0, XMM0; 0xC5, 0xF9, 0x66, 0xC0, // vpcmpgtd XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x37, 0xC0, // vpcmpgtq XMM0, XMM0, XMM0; 0xC4, 0xE3, 0x79, 0x61, 0xC0, 0x00, // vpcmpestri XMM0, XMM0, 0; 0xC4, 0xE3, 0x79, 0x60, 0xC0, 0x00, // vpcmpestrm XMM0, XMM0, 0; 0xC4, 0xE3, 0x79, 0x63, 0xC0, 0x00, // vpcmpistri XMM0, XMM0, 0; 0xC4, 0xE3, 0x79, 0x62, 0xC0, 0x00, // vpcmpistrm XMM0, XMM0, 0; 0xC5, 0xFA, 0xE6, 0xC0, // vcvtdq2pd XMM0, XMM0; 0xC5, 0xFE, 0xE6, 0xC0, // vcvtdq2pd YMM0, XMM0; 0xC5, 0xFE, 0xE6, 0x00, // vcvtdq2pd YMM0, [RAX]; 0xC5, 0xF8, 0x5B, 0xC0, // vcvtdq2ps XMM0, XMM0; 0xC5, 0xFC, 0x5B, 0xC0, // vcvtdq2ps YMM0, YMM0; 0xC5, 0xFC, 0x5B, 0x00, // vcvtdq2ps YMM0, [RAX]; 0xC5, 0xFB, 0xE6, 0xC0, // vcvtpd2dq XMM0, XMM0; 0xC5, 0xFF, 0xE6, 0xC0, // vcvtpd2dq XMM0, YMM0; 0xC5, 0xFB, 0xE6, 0x00, // vcvtpd2dq XMM0, [RAX]; 0xC5, 0xF9, 0x5A, 0xC0, // vcvtpd2ps XMM0, XMM0; 0xC5, 0xFD, 0x5A, 0xC0, // vcvtpd2ps XMM0, YMM0; 0xC5, 0xF9, 0x5A, 0x00, // vcvtpd2ps XMM0, [RAX]; 0xC5, 0xF9, 0x5B, 0xC0, // vcvtps2dq XMM0, XMM0; 0xC5, 0xFD, 0x5B, 0xC0, // vcvtps2dq YMM0, YMM0; 0xC5, 0xFD, 0x5B, 0x00, // vcvtps2dq YMM0, [RAX]; 0xC5, 0xF8, 0x5A, 0xC0, // vcvtps2pd XMM0, XMM0; 0xC5, 0xFC, 0x5A, 0xC0, // vcvtps2pd YMM0, XMM0; 0xC5, 0xFC, 0x5A, 0x00, // vcvtps2pd YMM0, [RAX]; 0xC5, 0xFB, 0x2D, 0xC0, // vcvtsd2si EAX, XMM0; 0xC4, 0xE1, 0xFB, 0x2D, 0xC0, // vcvtsd2si RAX, XMM0; 0xC4, 0xE1, 0xFB, 0x2D, 0x00, // vcvtsd2si RAX, [RAX]; 0xC5, 0xFB, 0x5A, 0xC0, // vcvtsd2ss XMM0, XMM0, XMM0; 0xC5, 0xFB, 0x5A, 0x00, // vcvtsd2ss XMM0, XMM0, [RAX]; 0xC5, 0xFB, 0x2A, 0xC0, // vcvtsi2sd XMM0, XMM0, EAX; 0xC4, 0xE1, 0xFB, 0x2A, 0xC0, // vcvtsi2sd XMM0, XMM0, RAX; 0xC5, 0xFB, 0x2A, 0x00, // vcvtsi2sd XMM0, XMM0, [RAX]; 0xC5, 0xFA, 0x2A, 0xC0, // vcvtsi2ss XMM0, XMM0, EAX; 0xC4, 0xE1, 0xFA, 0x2A, 0xC0, // vcvtsi2ss XMM0, XMM0, RAX; 0xC5, 0xFA, 0x2A, 0x00, // vcvtsi2ss XMM0, XMM0, [RAX]; 0xC5, 0xFB, 0x2A, 0xC0, // vcvtsi2sd XMM0, XMM0, EAX; 0xC4, 0xE1, 0xFB, 0x2A, 0xC0, // vcvtsi2sd XMM0, XMM0, RAX; 0xC5, 0xFB, 0x2A, 0x00, // vcvtsi2sd XMM0, XMM0, [RAX]; 0xC5, 0xFA, 0x2D, 0xC0, // vcvtss2si EAX, XMM0; 0xC4, 0xE1, 0xFA, 0x2D, 0xC0, // vcvtss2si RAX, XMM0; 0xC4, 0xE1, 0xFA, 0x2D, 0x00, // vcvtss2si RAX, [RAX]; 0xC5, 0xF9, 0xE6, 0xC0, // vcvttpd2dq XMM0, XMM0; 0xC5, 0xFD, 0xE6, 0xC0, // vcvttpd2dq XMM0, YMM0; 0xC5, 0xF9, 0xE6, 0x00, // vcvttpd2dq XMM0, [RAX]; 0xC5, 0xFA, 0x5B, 0xC0, // vcvttps2dq XMM0, XMM0; 0xC5, 0xFE, 0x5B, 0xC0, // vcvttps2dq YMM0, YMM0; 0xC5, 0xFE, 0x5B, 0x00, // vcvttps2dq YMM0, [RAX]; 0xC5, 0xFB, 0x2C, 0xC0, // vcvttsd2si EAX, XMM0; 0xC4, 0xE1, 0xFB, 0x2C, 0xC0, // vcvttsd2si RAX, XMM0; 0xC4, 0xE1, 0xFB, 0x2C, 0x00, // vcvttsd2si RAX, [RAX]; 0xC5, 0xFA, 0x2C, 0xC0, // vcvttss2si EAX, XMM0; 0xC4, 0xE1, 0xFA, 0x2C, 0xC0, // vcvttss2si RAX, XMM0; 0xC4, 0xE1, 0xFA, 0x2C, 0x00, // vcvttss2si RAX, [RAX]; 0xC4, 0xE2, 0x79, 0x18, 0x00, // vbroadcastss XMM0, [RAX]; 0xC4, 0xE2, 0x7D, 0x18, 0x00, // vbroadcastss YMM0, [RAX]; 0xC4, 0xE2, 0x7D, 0x19, 0x00, // vbroadcastsd YMM0, [RAX]; 0xC4, 0xE2, 0x7D, 0x1A, 0x00, // vbroadcastf128 YMM0, [RAX]; 0xC4, 0xE3, 0x7D, 0x19, 0xC0, 0x00, // vextractf128 XMM0, YMM0, 0; 0xC4, 0xE3, 0x7D, 0x19, 0x00, 0x00, // vextractf128 [RAX], YMM0, 0; 0xC4, 0xE3, 0x79, 0x17, 0xC0, 0x00, // vextractps EAX, XMM0, 0; 0xC4, 0xE3, 0x79, 0x17, 0x00, 0x00, // vextractps [RAX], XMM0, 0; 0xC4, 0xE3, 0x7D, 0x18, 0xC0, 0x00, // vinsertf128 YMM0, YMM0, XMM0, 0; 0xC4, 0xE3, 0x7D, 0x18, 0x00, 0x00, // vinsertf128 YMM0, YMM0, [RAX], 0; 0xC4, 0xE3, 0x79, 0x21, 0xC0, 0x00, // vinsertps XMM0, XMM0, XMM0, 0; 0xC4, 0xE3, 0x79, 0x21, 0x00, 0x00, // vinsertps XMM0, XMM0, [RAX], 0; 0xC4, 0xE3, 0x79, 0x20, 0xC0, 0x00, // vpinsrb XMM0, XMM0, EAX, 0; 0xC4, 0xE3, 0x79, 0x20, 0x00, 0x00, // vpinsrb XMM0, XMM0, [RAX], 0; 0xC5, 0xF9, 0xC4, 0xC0, 0x00, // vpinsrw XMM0, XMM0, EAX, 0; 0xC5, 0xF9, 0xC4, 0x00, 0x00, // vpinsrw XMM0, XMM0, [RAX], 0; 0xC4, 0xE3, 0x79, 0x22, 0xC0, 0x00, // vpinsrd XMM0, XMM0, EAX, 0; 0xC4, 0xE3, 0x79, 0x22, 0x00, 0x00, // vpinsrd XMM0, XMM0, [RAX], 0; 0xC4, 0xE3, 0xF9, 0x22, 0xC0, 0x00, // vpinsrq XMM0, XMM0, RAX, 0; 0xC4, 0xE3, 0xF9, 0x22, 0x00, 0x00, // vpinsrq XMM0, XMM0, [RAX], 0; 0xC5, 0xFB, 0xF0, 0x00, // vlddqu XMM0, [RAX]; 0xC5, 0xFF, 0xF0, 0x00, // vlddqu YMM0, [RAX]; 0xC5, 0xF9, 0xF7, 0xC0, // vmaskmovdqu XMM0, XMM0; 0xC4, 0xE2, 0x79, 0x2C, 0x00, // vmaskmovps XMM0, XMM0, [RAX]; 0xC4, 0xE2, 0x79, 0x2E, 0x00, // vmaskmovps [RAX], XMM0, XMM0; 0xC4, 0xE2, 0x7D, 0x2C, 0x00, // vmaskmovps YMM0, YMM0, [RAX]; 0xC4, 0xE2, 0x7D, 0x2E, 0x00, // vmaskmovps [RAX], YMM0, YMM0; 0xC4, 0xE2, 0x79, 0x2D, 0x00, // vmaskmovpd XMM0, XMM0, [RAX]; 0xC4, 0xE2, 0x79, 0x2F, 0x00, // vmaskmovpd [RAX], XMM0, XMM0; 0xC4, 0xE2, 0x7D, 0x2D, 0x00, // vmaskmovpd YMM0, YMM0, [RAX]; 0xC4, 0xE2, 0x7D, 0x2F, 0x00, // vmaskmovpd [RAX], YMM0, YMM0; 0xC5, 0xFD, 0x28, 0x00, // vmovapd YMM0, [RAX]; 0xC5, 0x7D, 0x28, 0x00, // vmovapd YMM8, [RAX]; 0xC5, 0x7D, 0x28, 0x47, 0x40, // vmovapd YMM8, 64[RDI]; 0xC5, 0xFD, 0x29, 0x00, // vmovapd [RAX], YMM0; 0xC5, 0x7D, 0x29, 0x00, // vmovapd [RAX], YMM8; 0xC5, 0x7D, 0x29, 0x47, 0x40, // vmovapd 64[RDI], YMM8; 0xC5, 0xFC, 0x28, 0x00, // vmovaps YMM0, [RAX]; 0xC5, 0x7C, 0x28, 0x00, // vmovaps YMM8, [RAX]; 0xC5, 0x7C, 0x28, 0x47, 0x40, // vmovaps YMM8, 64[RDI]; 0xC5, 0xFC, 0x29, 0x00, // vmovaps [RAX], YMM0; 0xC5, 0x7C, 0x29, 0x00, // vmovaps [RAX], YMM8; 0xC5, 0x7C, 0x29, 0x47, 0x40, // vmovaps 64[RDI], YMM8; 0xC5, 0xFD, 0x10, 0x00, // vmovupd YMM0, [RAX]; 0xC5, 0x7D, 0x10, 0x00, // vmovupd YMM8, [RAX]; 0xC5, 0x7D, 0x10, 0x47, 0x40, // vmovupd YMM8, 64[RDI]; 0xC5, 0xFD, 0x11, 0x00, // vmovupd [RAX], YMM0; 0xC5, 0x7D, 0x11, 0x00, // vmovupd [RAX], YMM8; 0xC5, 0x7D, 0x11, 0x47, 0x40, // vmovupd 64[RDI], YMM8; 0xC5, 0xFC, 0x10, 0x00, // vmovups YMM0, [RAX]; 0xC5, 0x7C, 0x10, 0x00, // vmovups YMM8, [RAX]; 0xC5, 0x7C, 0x10, 0x47, 0x40, // vmovups YMM8, 64[RDI]; 0xC5, 0xFC, 0x11, 0x00, // vmovups [RAX], YMM0; 0xC5, 0x7C, 0x11, 0x00, // vmovups [RAX], YMM8; 0xC5, 0x7C, 0x11, 0x47, 0x40, // vmovups 64[RDI], YMM8; 0xC5, 0xF9, 0x6E, 0xC0, // vmovd XMM0, EAX; 0xC5, 0xF9, 0x6E, 0x00, // vmovd XMM0, [RAX]; 0xC5, 0xF9, 0x7E, 0xC0, // vmovd EAX, XMM0; 0xC5, 0xF9, 0x7E, 0x00, // vmovd [RAX], XMM0; 0xC4, 0xE1, 0xF9, 0x6E, 0xC0, // vmovq XMM0, RAX; 0xC4, 0xE1, 0xF9, 0x6E, 0x00, // vmovq XMM0, [RAX]; 0xC4, 0xE1, 0xF9, 0x7E, 0xC0, // vmovq RAX, XMM0; 0xC4, 0xE1, 0xF9, 0x7E, 0x00, // vmovq [RAX], XMM0; 0xC5, 0xF9, 0x6F, 0xC0, // vmovdqa XMM0, XMM0; 0xC5, 0xF9, 0x6F, 0x00, // vmovdqa XMM0, [RAX]; 0xC5, 0xFD, 0x6F, 0xC0, // vmovdqa YMM0, YMM0; 0xC5, 0xFD, 0x6F, 0x00, // vmovdqa YMM0, [RAX]; 0xC5, 0xF9, 0x6F, 0xC0, // vmovdqa XMM0, XMM0; 0xC5, 0xF9, 0x7F, 0x00, // vmovdqa [RAX], XMM0; 0xC5, 0xFD, 0x6F, 0xC0, // vmovdqa YMM0, YMM0; 0xC5, 0xFD, 0x7F, 0x00, // vmovdqa [RAX],YMM0; 0xC5, 0xFA, 0x6F, 0xC0, // vmovdqu XMM0, XMM0; 0xC5, 0xFA, 0x6F, 0x00, // vmovdqu XMM0, [RAX]; 0xC5, 0xFE, 0x6F, 0xC0, // vmovdqu YMM0, YMM0; 0xC5, 0xFE, 0x6F, 0x00, // vmovdqu YMM0, [RAX]; 0xC5, 0xFA, 0x6F, 0xC0, // vmovdqu XMM0, XMM0; 0xC5, 0xFA, 0x7F, 0x00, // vmovdqu [RAX], XMM0; 0xC5, 0xFE, 0x6F, 0xC0, // vmovdqu YMM0, YMM0; 0xC5, 0xFE, 0x7F, 0x00, // vmovdqu [RAX],YMM0; 0xC5, 0xF8, 0x12, 0xC0, // vmovhlps XMM0, XMM0, XMM0; 0xC5, 0xF8, 0x16, 0xC0, // vmovlhps XMM0, XMM0, XMM0; 0xC5, 0xF9, 0x16, 0x00, // vmovhpd XMM0, XMM0, [RAX]; 0xC5, 0xF9, 0x17, 0x00, // vmovhpd [RAX], XMM0; 0xC5, 0xF8, 0x16, 0x00, // vmovhps XMM0, XMM0, [RAX]; 0xC5, 0xF8, 0x17, 0x00, // vmovhps [RAX], XMM0; 0xC5, 0xF9, 0x12, 0x00, // vmovlpd XMM0, XMM0, [RAX]; 0xC5, 0xF9, 0x13, 0x00, // vmovlpd [RAX], XMM0; 0xC5, 0xF8, 0x12, 0x00, // vmovlps XMM0, XMM0, [RAX]; 0xC5, 0xF8, 0x13, 0x00, // vmovlps [RAX], XMM0; 0xC5, 0xF9, 0xE7, 0x00, // vmovntdq [RAX], XMM0; 0xC5, 0x7D, 0xE7, 0x00, // vmovntdq [RAX], YMM8; 0xC5, 0xF9, 0x2B, 0x00, // vmovntpd [RAX], XMM0; 0xC5, 0x7D, 0x2B, 0x00, // vmovntpd [RAX], YMM8; 0xC5, 0xF8, 0x2B, 0x00, // vmovntps [RAX], XMM0; 0xC5, 0x7C, 0x2B, 0x00, // vmovntps [RAX], YMM8; 0xC4, 0xE2, 0x79, 0x2A, 0x00, // vmovntdqa XMM0, [RAX]; 0xC5, 0xFB, 0x10, 0xC0, // vmovsd XMM0, XMM0, XMM0; 0xC4, 0x41, 0x3B, 0x10, 0xC0, // vmovsd XMM8, XMM8, XMM8; 0xC5, 0xFB, 0x11, 0x00, // vmovsd [RAX], XMM0; 0xC4, 0x41, 0x7B, 0x11, 0x00, // vmovsd [R8], XMM8; 0xC5, 0xFA, 0x10, 0xC0, // vmovss XMM0, XMM0, XMM0; 0xC4, 0x41, 0x3A, 0x10, 0xC0, // vmovss XMM8, XMM8, XMM8; 0xC5, 0xFA, 0x11, 0x00, // vmovss [RAX], XMM0; 0xC4, 0x41, 0x7A, 0x11, 0x00, // vmovss [R8], XMM8; 0xC5, 0x7A, 0x16, 0xC1, // vmovshdup XMM8, XMM1; 0xC4, 0xC1, 0x7E, 0x16, 0xC0, // vmovshdup YMM0, YMM8; 0xC5, 0xFE, 0x16, 0x00, // vmovshdup YMM0, [RAX]; 0xC5, 0x7A, 0x12, 0xC1, // vmovsldup XMM8, XMM1; 0xC4, 0xC1, 0x7E, 0x12, 0xC0, // vmovsldup YMM0, YMM8; 0xC5, 0xFE, 0x12, 0x00, // vmovsldup YMM0, [RAX]; 0xC5, 0xF1, 0x67, 0xC2, // vpackuswb XMM0, XMM1, XMM2; 0xC5, 0xB9, 0x67, 0x00, // vpackuswb XMM0, XMM8, [RAX]; 0xC4, 0xE2, 0x71, 0x2B, 0xC2, // vpackusdw XMM0, XMM1, XMM2; 0xC4, 0xE2, 0x39, 0x2B, 0x00, // vpackusdw XMM0, XMM8, [RAX]; 0xC5, 0xF1, 0x63, 0xC2, // vpacksswb XMM0, XMM1, XMM2; 0xC5, 0xB9, 0x63, 0x00, // vpacksswb XMM0, XMM8, [RAX]; 0xC5, 0xF1, 0x6B, 0xC2, // vpackssdw XMM0, XMM1, XMM2; 0xC5, 0xB9, 0x6B, 0x00, // vpackssdw XMM0, XMM8, [RAX]; 0xC4, 0xE3, 0x71, 0x0F, 0xC2, 0xFF, // vpalignr XMM0, XMM1, XMM2, 0xFF; 0xC4, 0x63, 0x39, 0x0F, 0x08, 0x10, // vpalignr XMM9, XMM8, [RAX], 0x10; 0xC4, 0xE3, 0x79, 0x14, 0xC0, 0x00, // vpextrb EAX, XMM0, 0x0; 0xC4, 0x43, 0x79, 0x14, 0xCA, 0x0F, // vpextrb R10, XMM9, 0xF; 0xC4, 0x43, 0x79, 0x14, 0x0A, 0x0F, // vpextrb [R10], XMM9, 0xF; 0xC4, 0xE3, 0x79, 0x16, 0xC0, 0x00, // vpextrd EAX, XMM0, 0x0; 0xC4, 0x43, 0x79, 0x16, 0xC8, 0x0F, // vpextrd R8D, XMM9, 0xF; 0xC4, 0x43, 0x79, 0x16, 0x0A, 0x0F, // vpextrd [R10], XMM9, 0xF; 0xC4, 0xE3, 0xF9, 0x16, 0xC0, 0x00, // vpextrq RAX, XMM0, 0x0; 0xC4, 0x43, 0xF9, 0x16, 0xCA, 0x0F, // vpextrq R10, XMM9, 0xF; 0xC4, 0x43, 0xF9, 0x16, 0x0A, 0x0F, // vpextrq [R10], XMM9, 0xF; 0xC5, 0xF9, 0xC5, 0xCA, 0x03, // vpextrw ECX, XMM2, 0x3; 0xC5, 0xF9, 0xC5, 0xC0, 0x00, // vpextrw EAX, XMM0, 0x0; 0xC4, 0x41, 0x79, 0xC5, 0xD1, 0x0F, // vpextrw R10, XMM9, 0xF; 0xC4, 0x43, 0x79, 0x15, 0x0A, 0x0F, // vpextrw [R10], XMM9, 0xF; 0xC4, 0xE2, 0x79, 0x20, 0xC0, // vpmovsxbw XMM0, XMM0; 0xC4, 0x42, 0x79, 0x20, 0x00, // vpmovsxbw XMM8, [R8]; 0xC4, 0xE2, 0x79, 0x21, 0xC0, // vpmovsxbd XMM0, XMM0; 0xC4, 0x42, 0x79, 0x21, 0x00, // vpmovsxbd XMM8, [R8]; 0xC4, 0xE2, 0x79, 0x22, 0xC0, // vpmovsxbq XMM0, XMM0; 0xC4, 0x42, 0x79, 0x22, 0x00, // vpmovsxbq XMM8, [R8]; 0xC4, 0xE2, 0x79, 0x23, 0xC0, // vpmovsxwd XMM0, XMM0; 0xC4, 0x42, 0x79, 0x23, 0x00, // vpmovsxwd XMM8, [R8]; 0xC4, 0xE2, 0x79, 0x24, 0xC0, // vpmovsxwq XMM0, XMM0; 0xC4, 0x42, 0x79, 0x24, 0x00, // vpmovsxwq XMM8, [R8]; 0xC4, 0xE2, 0x79, 0x25, 0xC0, // vpmovsxdq XMM0, XMM0; 0xC4, 0x42, 0x79, 0x25, 0x00, // vpmovsxdq XMM8, [R8]; 0xC4, 0xE2, 0x79, 0x30, 0xC0, // vpmovzxbw XMM0, XMM0; 0xC4, 0x42, 0x79, 0x30, 0x00, // vpmovzxbw XMM8, [R8]; 0xC4, 0xE2, 0x79, 0x31, 0xC0, // vpmovzxbd XMM0, XMM0; 0xC4, 0x42, 0x79, 0x31, 0x00, // vpmovzxbd XMM8, [R8]; 0xC4, 0xE2, 0x79, 0x32, 0xC0, // vpmovzxbq XMM0, XMM0; 0xC4, 0x42, 0x79, 0x32, 0x00, // vpmovzxbq XMM8, [R8]; 0xC4, 0xE2, 0x79, 0x33, 0xC0, // vpmovzxwd XMM0, XMM0; 0xC4, 0x42, 0x79, 0x33, 0x00, // vpmovzxwd XMM8, [R8]; 0xC4, 0xE2, 0x79, 0x34, 0xC0, // vpmovzxwq XMM0, XMM0; 0xC4, 0x42, 0x79, 0x34, 0x00, // vpmovzxwq XMM8, [R8]; 0xC4, 0xE2, 0x79, 0x35, 0xC0, // vpmovzxdq XMM0, XMM0; 0xC4, 0x42, 0x79, 0x35, 0x00, // vpmovzxdq XMM8, [R8]; 0xC5, 0xF9, 0x54, 0xC0, // vandpd XMM0, XMM0, XMM0; 0xC4, 0x41, 0x39, 0x54, 0x08, // vandpd XMM9, XMM8, [R8]; 0xC5, 0xF8, 0x54, 0xC0, // vandps XMM0, XMM0, XMM0; 0xC4, 0x41, 0x38, 0x54, 0x08, // vandps XMM9, XMM8, [R8]; 0xC5, 0xF9, 0x55, 0xC0, // vandnpd XMM0, XMM0, XMM0; 0xC4, 0x41, 0x39, 0x55, 0x08, // vandnpd XMM9, XMM8, [R8]; 0xC5, 0xF8, 0x55, 0xC0, // vandnps XMM0, XMM0, XMM0; 0xC4, 0x41, 0x38, 0x55, 0x08, // vandnps XMM9, XMM8, [R8]; 0xC5, 0xF9, 0x56, 0xC0, // vorpd XMM0, XMM0, XMM0; 0xC4, 0x41, 0x39, 0x56, 0x08, // vorpd XMM9, XMM8, [R8]; 0xC5, 0xF8, 0x56, 0xC0, // vorps XMM0, XMM0, XMM0; 0xC4, 0x41, 0x38, 0x56, 0x08, // vorps XMM9, XMM8, [R8]; 0xC5, 0xF9, 0xDB, 0xC0, // vpand XMM0, XMM0, XMM0; 0xC4, 0x41, 0x39, 0xDB, 0x08, // vpand XMM9, XMM8, [R8]; 0xC5, 0xF9, 0xDF, 0xC0, // vpandn XMM0, XMM0, XMM0; 0xC4, 0x41, 0x39, 0xDF, 0x08, // vpandn XMM9, XMM8, [R8]; 0xC5, 0xF9, 0xEB, 0xC0, // vpor XMM0, XMM0, XMM0; 0xC4, 0x41, 0x39, 0xEB, 0x0A, // vpor XMM9, XMM8, [R10]; 0xC5, 0xF9, 0xEF, 0xC0, // vpxor XMM0, XMM0, XMM0; 0xC4, 0x41, 0x39, 0xEF, 0x0A, // vpxor XMM9, XMM8, [R10]; 0xC4, 0xE2, 0x79, 0x17, 0xC0, // vptest XMM0, XMM0; 0xC4, 0x62, 0x79, 0x17, 0x00, // vptest XMM8, [RAX]; 0xC4, 0x42, 0x7D, 0x17, 0xC0, // vptest YMM8, YMM8; 0xC4, 0xC2, 0x7D, 0x17, 0x00, // vptest YMM0, [R8]; 0xC5, 0xF9, 0x2E, 0xC0, // vucomisd XMM0, XMM0; 0xC5, 0x79, 0x2E, 0x00, // vucomisd XMM8, [RAX] 0xC5, 0xF8, 0x2E, 0xC0, // vucomiss YMM8, YMM8; 0xC5, 0x78, 0x2E, 0x00, // vucomiss YMM0, [R8]; 0xC5, 0xB9, 0x57, 0xC0, // vxorpd XMM0, XMM8, XMM0; 0xC5, 0x79, 0x57, 0x00, // vxorpd XMM8, XMM0, [RAX]; 0xC5, 0xBD, 0x57, 0xC0, // vxorpd YMM0, YMM8, YMM0; 0xC5, 0x7D, 0x57, 0x00, // vxorpd YMM8, YMM0, [RAX]; 0xC5, 0xB8, 0x57, 0xC0, // vxorps XMM0, XMM8, XMM0; 0xC5, 0x78, 0x57, 0x00, // vxorps XMM8, XMM0, [RAX]; 0xC5, 0xBC, 0x57, 0xC0, // vxorps YMM0, YMM8, YMM0; 0xC5, 0x7C, 0x57, 0x00, // vxorps YMM8, YMM0, [RAX]; 0xC4, 0xE3, 0x71, 0x0D, 0xC2, 0x00, // vblendpd XMM0, XMM1, XMM2, 0x00; 0xC4, 0x63, 0x39, 0x0D, 0x08, 0xFF, // vblendpd XMM9, XMM8, [RAX], 0xFF; 0xC4, 0xE3, 0x75, 0x0D, 0xC2, 0x00, // vblendpd YMM0, YMM1, YMM2, 0x00; 0xC4, 0x63, 0x3D, 0x0D, 0x08, 0xFF, // vblendpd YMM9, YMM8, [RAX], 0xFF; 0xC4, 0xE3, 0x71, 0x0C, 0xC2, 0x00, // vblendps XMM0, XMM1, XMM2, 0x00; 0xC4, 0x63, 0x39, 0x0C, 0x08, 0xFF, // vblendps XMM9, XMM8, [RAX], 0xFF; 0xC4, 0xE3, 0x75, 0x0C, 0xC2, 0x00, // vblendps YMM0, YMM1, YMM2, 0x00; 0xC4, 0x63, 0x3D, 0x0C, 0x08, 0xFF, // vblendps YMM9, YMM8, [RAX], 0xFF; 0xC4, 0xE3, 0x71, 0x4B, 0xC2, 0x00, // vblendvpd XMM0, XMM1, XMM2, 0x00; 0xC4, 0x63, 0x39, 0x4B, 0x08, 0xff, // vblendvpd XMM9, XMM8, [RAX], 0xFF; 0xC4, 0xE3, 0x75, 0x4B, 0xC2, 0x00, // vblendvpd YMM0, YMM1, YMM2, 0x00; 0xC4, 0x63, 0x3D, 0x4B, 0x08, 0xff, // vblendvpd YMM9, YMM8, [RAX], 0xFF; 0xC4, 0xE3, 0x71, 0x4A, 0xC2, 0x00, // vblendvps XMM0, XMM1, XMM2, 0x00; 0xC4, 0x63, 0x39, 0x4A, 0x08, 0xff, // vblendvps XMM9, XMM8, [RAX], 0xFF; 0xC4, 0xE3, 0x75, 0x4A, 0xC2, 0x00, // vblendvps YMM0, YMM1, YMM2, 0x00; 0xC4, 0x63, 0x3D, 0x4A, 0x08, 0xff, // vblendvps YMM9, YMM8, [RAX], 0xFF; 0xC5, 0x7B, 0x12, 0xC0, // vmovddup XMM8, XMM0; 0xC5, 0xFB, 0x12, 0x00, // vmovddup XMM0, [RAX]; 0xC4, 0xC1, 0x7F, 0x12, 0xC0, // vmovddup YMM0, YMM8; 0xC4, 0xC1, 0x7F, 0x12, 0x02, // vmovddup YMM0, [R10]; 0xC4, 0xE3, 0x39, 0x4C, 0xC0, 0x00, // vpblendvb XMM0, XMM8, XMM0, 0x00; 0xC4, 0x63, 0x79, 0x4C, 0x00, 0x00, // vpblendvb XMM8, XMM0, [RAX], 0x00; 0xC4, 0x43, 0x79, 0x4C, 0x02, 0x00, // vpblendvb XMM8, XMM0, [R10], 0x00; 0xC4, 0xE3, 0x39, 0x0E, 0xC0, 0x00, // vpblendw XMM0, XMM8, XMM0, 0x00; 0xC4, 0x63, 0x79, 0x0E, 0x00, 0x00, // vpblendw XMM8, XMM0, [RAX], 0x00; 0xC4, 0x43, 0x79, 0x0E, 0x02, 0x00, // vpblendw XMM8, XMM0, [R10], 0x00; 0xC4, 0xE2, 0x71, 0x0D, 0xC2, // vpermilpd XMM0, XMM1, XMM2; 0xC4, 0xE2, 0x71, 0x0D, 0x00, // vpermilpd XMM0, XMM1, [RAX]; 0xC4, 0xE3, 0x79, 0x05, 0xC1, 0x00, // vpermilpd XMM0, XMM1, 0x00; 0xC4, 0xE3, 0x79, 0x05, 0x00, 0x00, // vpermilpd XMM0, [RAX], 0x00; 0xC4, 0xE2, 0x71, 0x0C, 0xC2, // vpermilps XMM0, XMM1, XMM2; 0xC4, 0xE2, 0x71, 0x0C, 0x00, // vpermilps XMM0, XMM1, [RAX]; 0xC4, 0xE3, 0x79, 0x04, 0xC1, 0x00, // vpermilps XMM0, XMM1, 0x00; 0xC4, 0xE3, 0x79, 0x04, 0x00, 0x00, // vpermilps XMM0, [RAX], 0x00; 0xC4, 0xE3, 0x75, 0x06, 0xC2, 0x00, // vperm2f128 YMM0, YMM1, YMM2, 0x00; 0xC4, 0xE3, 0x75, 0x06, 0x00, 0x00, // vperm2f128 YMM0, YMM1, [RAX], 0x00; 0xC4, 0x43, 0x35, 0x06, 0x02, 0x00, // vperm2f128 YMM8, YMM9, [R10], 0x00; 0xC4, 0xE2, 0x79, 0x00, 0xC0, // vpshufb XMM0, XMM0, XMM0; 0xC4, 0x42, 0x39, 0x00, 0x08, // vpshufb XMM9, XMM8, [R8]; 0xC5, 0xF9, 0x70, 0xC0, 0x00, // vpshufd XMM0, XMM0, 0x0; 0xC4, 0x41, 0x79, 0x70, 0x00, 0x00, // vpshufd XMM8, [R8], 0x0; 0xC5, 0xFA, 0x70, 0xC0, 0x00, // vpshufhw XMM0, XMM0, 0x0; 0xC4, 0x41, 0x7A, 0x70, 0x00, 0x00, // vpshufhw XMM8, [R8], 0x0; 0xC5, 0xFB, 0x70, 0xC0, 0x00, // vpshuflw XMM0, XMM0, 0x0; 0xC4, 0x41, 0x7B, 0x70, 0x00, 0x00, // vpshuflw XMM8, [R8], 0x0; 0xC5, 0xF1, 0x68, 0xC2, // vpunpckhbw XMM0, XMM1, XMM2; 0xC4, 0x41, 0x39, 0x68, 0x00, // vpunpckhbw XMM8, XMM8, [R8]; 0xC5, 0xF1, 0x69, 0xC2, // vpunpckhwd XMM0, XMM1, XMM2; 0xC4, 0x41, 0x39, 0x69, 0x00, // vpunpckhwd XMM8, XMM8, [R8]; 0xC5, 0xF1, 0x6A, 0xC2, // vpunpckhdq XMM0, XMM1, XMM2; 0xC4, 0x41, 0x39, 0x6A, 0x00, // vpunpckhdq XMM8, XMM8, [R8]; 0xC5, 0xF1, 0x6D, 0xC2, // vpunpckhqdq XMM0, XMM1, XMM2; 0xC4, 0x41, 0x39, 0x6D, 0x00, // vpunpckhqdq XMM8, XMM8, [R8]; 0xC5, 0xF1, 0x60, 0xC2, // vpunpcklbw XMM0, XMM1, XMM2; 0xC4, 0x41, 0x39, 0x60, 0x00, // vpunpcklbw XMM8, XMM8, [R8]; 0xC5, 0xF1, 0x61, 0xC2, // vpunpcklwd XMM0, XMM1, XMM2; 0xC4, 0x41, 0x39, 0x61, 0x00, // vpunpcklwd XMM8, XMM8, [R8]; 0xC5, 0xF1, 0x62, 0xC2, // vpunpckldq XMM0, XMM1, XMM2; 0xC4, 0x41, 0x39, 0x62, 0x00, // vpunpckldq XMM8, XMM8, [R8]; 0xC5, 0xF1, 0x6C, 0xC2, // vpunpcklqdq XMM0, XMM1, XMM2; 0xC4, 0x41, 0x39, 0x6C, 0x00, // vpunpcklqdq XMM8, XMM8, [R8]; 0xC5, 0xF9, 0xC6, 0xC0, 0x00, // vshufpd XMM0, XMM0, XMM0, 0x00; 0xC4, 0xC1, 0x39, 0xC6, 0x00, 0x00, // vshufpd XMM0, XMM8, [R8], 0x00; 0xC4, 0x41, 0x7D, 0xC6, 0xC0, 0x00, // vshufpd YMM8, YMM0, YMM8, 0x00; 0xC5, 0x7D, 0xC6, 0x00, 0x00, // vshufpd YMM8, YMM0, [RAX], 0x00; 0xC5, 0xF8, 0xC6, 0xC0, 0x00, // vshufps XMM0, XMM0, XMM0, 0x00; 0xC4, 0xC1, 0x38, 0xC6, 0x00, 0x00, // vshufps XMM0, XMM8, [R8], 0x00; 0xC4, 0x41, 0x7C, 0xC6, 0xC0, 0x00, // vshufps YMM8, YMM0, YMM8, 0x00; 0xC5, 0x7C, 0xC6, 0x00, 0x00, // vshufps YMM8, YMM0, [RAX], 0x00; 0xC5, 0xF9, 0x15, 0xC0, // vunpckhpd XMM0, XMM0, XMM0; 0xC5, 0x39, 0x15, 0x00, // vunpckhpd XMM8, XMM8, [RAX]; 0xC4, 0x41, 0x7D, 0x15, 0x00, // vunpckhpd YMM8, YMM0, [R8]; 0xC4, 0xC1, 0x3D, 0x15, 0x00, // vunpckhpd YMM0, YMM8, [R8]; 0xC5, 0xF8, 0x15, 0xC0, // vunpckhps XMM0, XMM0, XMM0; 0xC5, 0x38, 0x15, 0x00, // vunpckhps XMM8, XMM8, [RAX]; 0xC4, 0x41, 0x7C, 0x15, 0x00, // vunpckhps YMM8, YMM0, [R8]; 0xC4, 0xC1, 0x3C, 0x15, 0x00, // vunpckhps YMM0, YMM8, [R8]; 0xC5, 0xF9, 0x14, 0xC0, // vunpcklpd XMM0, XMM0, XMM0; 0xC5, 0x39, 0x14, 0x00, // vunpcklpd XMM8, XMM8, [RAX]; 0xC4, 0x41, 0x7D, 0x14, 0x00, // vunpcklpd YMM8, YMM0, [R8]; 0xC4, 0xC1, 0x3D, 0x14, 0x00, // vunpcklpd YMM0, YMM8, [R8]; 0xC5, 0xF8, 0x14, 0xC0, // vunpcklps XMM0, XMM0, XMM0; 0xC5, 0x38, 0x14, 0x00, // vunpcklps XMM8, XMM8, [RAX]; 0xC4, 0x41, 0x7C, 0x14, 0x00, // vunpcklps YMM8, YMM0, [R8]; 0xC4, 0xC1, 0x3C, 0x14, 0x00, // vunpcklps YMM0, YMM8, [R8]; /* AES */ 0x66, 0x0F, 0x38, 0xDC, 0xC0, // aesenc XMM0, XMM0; 0x66, 0x0F, 0x38, 0xDC, 0x00, // aesenc XMM0, [RAX]; 0xC4, 0xE2, 0x79, 0xDC, 0xC0, // vaesenc XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0xDC, 0x00, // vaesenc XMM0, XMM0, [RAX]; 0x66, 0x0F, 0x38, 0xDD, 0xC0, // aesenclast XMM0, XMM0; 0x66, 0x0F, 0x38, 0xDD, 0x00, // aesenclast XMM0, [RAX]; 0xC4, 0xE2, 0x79, 0xDD, 0xC0, // vaesenclast XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0xDD, 0x00, // vaesenclast XMM0, XMM0, [RAX]; 0x66, 0x0F, 0x38, 0xDE, 0xC0, // aesdec XMM0, XMM0; 0x66, 0x0F, 0x38, 0xDE, 0x00, // aesdec XMM0, [RAX]; 0xC4, 0xE2, 0x79, 0xDE, 0xC0, // vaesdec XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0xDE, 0x00, // vaesdec XMM0, XMM0, [RAX]; 0x66, 0x0F, 0x38, 0xDF, 0xC0, // aesdeclast XMM0, XMM0; 0x66, 0x0F, 0x38, 0xDF, 0x00, // aesdeclast XMM0, [RAX]; 0xC4, 0xE2, 0x79, 0xDF, 0xC0, // vaesdeclast XMM0, XMM0, XMM0; 0xC4, 0xE2, 0x79, 0xDF, 0x00, // vaesdeclast XMM0, XMM0, [RAX]; 0x66, 0x0F, 0x38, 0xDB, 0xC0, // aesimc XMM0, XMM0; 0x66, 0x0F, 0x38, 0xDB, 0x00, // aesimc XMM0, [RAX]; 0xC4, 0xE2, 0x79, 0xDB, 0xC0, // vaesimc XMM0, XMM0; 0xC4, 0xE2, 0x79, 0xDB, 0x00, // vaesimc XMM0, [RAX]; 0x66, 0x0F, 0x3A, 0xDF, 0xC0, 0x00, // aeskeygenassist XMM0, XMM0, 0x0; 0x66, 0x0F, 0x3A, 0xDF, 0x00, 0x00, // aeskeygenassist XMM0, [RAX], 0x0; 0xC4, 0xE3, 0x79, 0xDF, 0xC0, 0x00, // vaeskeygenassist XMM0, XMM0, 0x0; 0xC4, 0xE3, 0x79, 0xDF, 0x00, 0x00, // vaeskeygenassist XMM0, [RAX], 0x0; /* FSGSBASE */ 0xf3, 0x48, 0x0f, 0xae, 0xc0, // rdfsbase RAX; 0xf3, 0x49, 0x0f, 0xae, 0xc7, // rdfsbase R15; 0xf3, 0x48, 0x0f, 0xae, 0xc8, // rdgsbase RAX; 0xf3, 0x49, 0x0f, 0xae, 0xcf, // rdgsbase R15; 0xf3, 0x48, 0x0f, 0xae, 0xd0, // wrfsbase RAX; 0xf3, 0x49, 0x0f, 0xae, 0xd7, // wrfsbase R15; 0xf3, 0x48, 0x0f, 0xae, 0xd8, // wrgsbase RAX; 0xf3, 0x49, 0x0f, 0xae, 0xdf, // wrgsbase R15; /* RDRAND */ 0x66, 0x0f, 0xc7, 0xf0, // rdrand AX; 0x0f, 0xc7, 0xf0, // rdrand EAX; 0x48, 0x0f, 0xc7, 0xf0, // rdrand RAX; 0x66, 0x41, 0x0f, 0xc7, 0xf7, // rdrand R15W; 0x41, 0x0f, 0xc7, 0xf7, // rdrand R15D; 0x49, 0x0f, 0xc7, 0xf7, // rdrand R15; /* RDSEED */ 0x66, 0x0f, 0xc7, 0xf8, // rdseed AX; 0x0f, 0xc7, 0xf8, // rdseed EAX; 0x48, 0x0f, 0xc7, 0xf8, // rdseed RAX; 0x66, 0x41, 0x0f, 0xc7, 0xff, // rdseed R15W; 0x41, 0x0f, 0xc7, 0xff, // rdseed R15D; 0x49, 0x0f, 0xc7, 0xff, // rdseed R15; /* FP16C */ 0xc4, 0xe2, 0x79, 0x13, 0xc0, // vcvtph2ps XMM0, XMM0; 0xc4, 0xe2, 0x79, 0x13, 0x00, // vcvtph2ps XMM0, [RAX]; 0xc4, 0xe2, 0x7d, 0x13, 0xc0, // vcvtph2ps YMM0, XMM0; 0xc4, 0x42, 0x7d, 0x13, 0x00, // vcvtph2ps YMM8, [R8]; 0xc4, 0xe3, 0x79, 0x1d, 0xc0, 0x00, // vcvtps2ph XMM0, XMM0, 0x0; 0xc4, 0xe3, 0x79, 0x1d, 0x00, 0x00, // vcvtps2ph [RAX], XMM0, 0x0; 0xc4, 0xe3, 0x7d, 0x1d, 0xc0, 0x00, // vcvtps2ph XMM0, YMM0, 0x0; 0xc4, 0x43, 0x7d, 0x1d, 0x00, 0x00, // vcvtps2ph [R8], YMM8, 0x0; /* FMA */ 0xc4, 0xe2, 0xf9, 0x98, 0xc0, // vfmadd132pd XMM0, XMM0, XMM0; 0xc4, 0xe2, 0xf9, 0x98, 0x00, // vfmadd132pd XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0xfd, 0x98, 0xc0, // vfmadd132pd YMM0, YMM0, YMM0; 0xc4, 0x42, 0xbd, 0x98, 0x00, // vfmadd132pd YMM8, YMM8, [R8]; 0xc4, 0xe2, 0x79, 0x98, 0xc0, // vfmadd132ps XMM0, XMM0, XMM0; 0xc4, 0xe2, 0x79, 0x98, 0x00, // vfmadd132ps XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0x7d, 0x98, 0xc0, // vfmadd132ps YMM0, YMM0, YMM0; 0xc4, 0x42, 0x3d, 0x98, 0x00, // vfmadd132ps YMM8, YMM8, [R8]; 0xc4, 0xe2, 0xf9, 0x99, 0xc0, // vfmadd132sd XMM0, XMM0, XMM0; 0xc4, 0xe2, 0xf9, 0x99, 0x00, // vfmadd132sd XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0x79, 0x99, 0xc0, // vfmadd132ss XMM0, XMM0, XMM0; 0xc4, 0xe2, 0x79, 0x99, 0x00, // vfmadd132ss XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0xf9, 0xA8, 0xc0, // vfmadd213pd XMM0, XMM0, XMM0; 0xc4, 0xe2, 0xf9, 0xA8, 0x00, // vfmadd213pd XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0xfd, 0xA8, 0xc0, // vfmadd213pd YMM0, YMM0, YMM0; 0xc4, 0x42, 0xbd, 0xA8, 0x00, // vfmadd213pd YMM8, YMM8, [R8]; 0xc4, 0xe2, 0x79, 0xA8, 0xc0, // vfmadd213ps XMM0, XMM0, XMM0; 0xc4, 0xe2, 0x79, 0xA8, 0x00, // vfmadd213ps XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0x7d, 0xA8, 0xc0, // vfmadd213ps YMM0, YMM0, YMM0; 0xc4, 0x42, 0x3d, 0xA8, 0x00, // vfmadd213ps YMM8, YMM8, [R8]; 0xc4, 0xe2, 0xf9, 0xA9, 0xc0, // vfmadd213sd XMM0, XMM0, XMM0; 0xc4, 0xe2, 0xf9, 0xA9, 0x00, // vfmadd213sd XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0x79, 0xA9, 0xc0, // vfmadd213ss XMM0, XMM0, XMM0; 0xc4, 0xe2, 0x79, 0xA9, 0x00, // vfmadd213ss XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0xf9, 0xB8, 0xc0, // vfmadd231pd XMM0, XMM0, XMM0; 0xc4, 0xe2, 0xf9, 0xB8, 0x00, // vfmadd231pd XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0xfd, 0xB8, 0xc0, // vfmadd231pd YMM0, YMM0, YMM0; 0xc4, 0x42, 0xbd, 0xB8, 0x00, // vfmadd231pd YMM8, YMM8, [R8]; 0xc4, 0xe2, 0x79, 0xB8, 0xc0, // vfmadd231ps XMM0, XMM0, XMM0; 0xc4, 0xe2, 0x79, 0xB8, 0x00, // vfmadd231ps XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0x7d, 0xB8, 0xc0, // vfmadd231ps YMM0, YMM0, YMM0; 0xc4, 0x42, 0x3d, 0xB8, 0x00, // vfmadd231ps YMM8, YMM8, [R8]; 0xc4, 0xe2, 0xf9, 0xB9, 0xc0, // vfmadd231sd XMM0, XMM0, XMM0; 0xc4, 0xe2, 0xf9, 0xB9, 0x00, // vfmadd231sd XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0x79, 0xB9, 0xc0, // vfmadd231ss XMM0, XMM0, XMM0; 0xc4, 0xe2, 0x79, 0xB9, 0x00, // vfmadd231ss XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0xf9, 0x96, 0xc0, // vfmaddsub132pd XMM0, XMM0, XMM0; 0xc4, 0xe2, 0xf9, 0x96, 0x00, // vfmaddsub132pd XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0xfd, 0x96, 0xc0, // vfmaddsub132pd YMM0, YMM0, YMM0; 0xc4, 0x42, 0xbd, 0x96, 0x00, // vfmaddsub132pd YMM8, YMM8, [R8]; 0xc4, 0xe2, 0x79, 0x96, 0xc0, // vfmaddsub132ps XMM0, XMM0, XMM0; 0xc4, 0xe2, 0x79, 0x96, 0x00, // vfmaddsub132ps XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0x7d, 0x96, 0xc0, // vfmaddsub132ps YMM0, YMM0, YMM0; 0xc4, 0x42, 0x3d, 0x96, 0x00, // vfmaddsub132ps YMM8, YMM8, [R8]; 0xc4, 0xe2, 0xf9, 0xA6, 0xc0, // vfmaddsub213pd XMM0, XMM0, XMM0; 0xc4, 0xe2, 0xf9, 0xA6, 0x00, // vfmaddsub213pd XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0xfd, 0xA6, 0xc0, // vfmaddsub213pd YMM0, YMM0, YMM0; 0xc4, 0x42, 0xbd, 0xA6, 0x00, // vfmaddsub213pd YMM8, YMM8, [R8]; 0xc4, 0xe2, 0x79, 0xA6, 0xc0, // vfmaddsub213ps XMM0, XMM0, XMM0; 0xc4, 0xe2, 0x79, 0xA6, 0x00, // vfmaddsub213ps XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0x7d, 0xA6, 0xc0, // vfmaddsub213ps YMM0, YMM0, YMM0; 0xc4, 0x42, 0x3d, 0xA6, 0x00, // vfmaddsub213ps YMM8, YMM8, [R8]; 0xc4, 0xe2, 0xf9, 0xB6, 0xc0, // vfmaddsub231pd XMM0, XMM0, XMM0; 0xc4, 0xe2, 0xf9, 0xB6, 0x00, // vfmaddsub231pd XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0xfd, 0xB6, 0xc0, // vfmaddsub231pd YMM0, YMM0, YMM0; 0xc4, 0x42, 0xbd, 0xB6, 0x00, // vfmaddsub231pd YMM8, YMM8, [R8]; 0xc4, 0xe2, 0x79, 0xB6, 0xc0, // vfmaddsub231ps XMM0, XMM0, XMM0; 0xc4, 0xe2, 0x79, 0xB6, 0x00, // vfmaddsub231ps XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0x7d, 0xB6, 0xc0, // vfmaddsub231ps YMM0, YMM0, YMM0; 0xc4, 0x42, 0x3d, 0xB6, 0x00, // vfmaddsub231ps YMM8, YMM8, [R8]; 0xc4, 0xe2, 0xf9, 0x97, 0xc0, // vfmsubadd132pd XMM0, XMM0, XMM0; 0xc4, 0xe2, 0xf9, 0x97, 0x00, // vfmsubadd132pd XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0xfd, 0x97, 0xc0, // vfmsubadd132pd YMM0, YMM0, YMM0; 0xc4, 0x42, 0xbd, 0x97, 0x00, // vfmsubadd132pd YMM8, YMM8, [R8]; 0xc4, 0xe2, 0x79, 0x97, 0xc0, // vfmsubadd132ps XMM0, XMM0, XMM0; 0xc4, 0xe2, 0x79, 0x97, 0x00, // vfmsubadd132ps XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0x7d, 0x97, 0xc0, // vfmsubadd132ps YMM0, YMM0, YMM0; 0xc4, 0x42, 0x3d, 0x97, 0x00, // vfmsubadd132ps YMM8, YMM8, [R8]; 0xc4, 0xe2, 0xf9, 0xA7, 0xc0, // vfmsubadd213pd XMM0, XMM0, XMM0; 0xc4, 0xe2, 0xf9, 0xA7, 0x00, // vfmsubadd213pd XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0xfd, 0xA7, 0xc0, // vfmsubadd213pd YMM0, YMM0, YMM0; 0xc4, 0x42, 0xbd, 0xA7, 0x00, // vfmsubadd213pd YMM8, YMM8, [R8]; 0xc4, 0xe2, 0x79, 0xA7, 0xc0, // vfmsubadd213ps XMM0, XMM0, XMM0; 0xc4, 0xe2, 0x79, 0xA7, 0x00, // vfmsubadd213ps XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0x7d, 0xA7, 0xc0, // vfmsubadd213ps YMM0, YMM0, YMM0; 0xc4, 0x42, 0x3d, 0xA7, 0x00, // vfmsubadd213ps YMM8, YMM8, [R8]; 0xc4, 0xe2, 0xf9, 0xB7, 0xc0, // vfmsubadd231pd XMM0, XMM0, XMM0; 0xc4, 0xe2, 0xf9, 0xB7, 0x00, // vfmsubadd231pd XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0xfd, 0xB7, 0xc0, // vfmsubadd231pd YMM0, YMM0, YMM0; 0xc4, 0x42, 0xbd, 0xB7, 0x00, // vfmsubadd231pd YMM8, YMM8, [R8]; 0xc4, 0xe2, 0x79, 0xB7, 0xc0, // vfmsubadd231ps XMM0, XMM0, XMM0; 0xc4, 0xe2, 0x79, 0xB7, 0x00, // vfmsubadd231ps XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0x7d, 0xB7, 0xc0, // vfmsubadd231ps YMM0, YMM0, YMM0; 0xc4, 0x42, 0x3d, 0xB7, 0x00, // vfmsubadd231ps YMM8, YMM8, [R8]; 0xc4, 0xe2, 0xf9, 0x9A, 0xc0, // vfmsub132pd XMM0, XMM0, XMM0; 0xc4, 0xe2, 0xf9, 0x9A, 0x00, // vfmsub132pd XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0xfd, 0x9A, 0xc0, // vfmsub132pd YMM0, YMM0, YMM0; 0xc4, 0x42, 0xbd, 0x9A, 0x00, // vfmsub132pd YMM8, YMM8, [R8]; 0xc4, 0xe2, 0x79, 0x9A, 0xc0, // vfmsub132ps XMM0, XMM0, XMM0; 0xc4, 0xe2, 0x79, 0x9A, 0x00, // vfmsub132ps XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0x7d, 0x9A, 0xc0, // vfmsub132ps YMM0, YMM0, YMM0; 0xc4, 0x42, 0x3d, 0x9A, 0x00, // vfmsub132ps YMM8, YMM8, [R8]; 0xc4, 0xe2, 0xf9, 0x9B, 0xc0, // vfmsub132sd XMM0, XMM0, XMM0; 0xc4, 0xe2, 0xf9, 0x9B, 0x00, // vfmsub132sd XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0x79, 0x9B, 0xc0, // vfmsub132ss XMM0, XMM0, XMM0; 0xc4, 0xe2, 0x79, 0x9B, 0x00, // vfmsub132ss XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0xf9, 0xAA, 0xc0, // vfmsub213pd XMM0, XMM0, XMM0; 0xc4, 0xe2, 0xf9, 0xAA, 0x00, // vfmsub213pd XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0xfd, 0xAA, 0xc0, // vfmsub213pd YMM0, YMM0, YMM0; 0xc4, 0x42, 0xbd, 0xAA, 0x00, // vfmsub213pd YMM8, YMM8, [R8]; 0xc4, 0xe2, 0x79, 0xAA, 0xc0, // vfmsub213ps XMM0, XMM0, XMM0; 0xc4, 0xe2, 0x79, 0xAA, 0x00, // vfmsub213ps XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0x7d, 0xAA, 0xc0, // vfmsub213ps YMM0, YMM0, YMM0; 0xc4, 0x42, 0x3d, 0xAA, 0x00, // vfmsub213ps YMM8, YMM8, [R8]; 0xc4, 0xe2, 0xf9, 0xAB, 0xc0, // vfmsub213sd XMM0, XMM0, XMM0; 0xc4, 0xe2, 0xf9, 0xAB, 0x00, // vfmsub213sd XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0x79, 0xAB, 0xc0, // vfmsub213ss XMM0, XMM0, XMM0; 0xc4, 0xe2, 0x79, 0xAB, 0x00, // vfmsub213ss XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0xf9, 0xBA, 0xc0, // vfmsub231pd XMM0, XMM0, XMM0; 0xc4, 0xe2, 0xf9, 0xBA, 0x00, // vfmsub231pd XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0xfd, 0xBA, 0xc0, // vfmsub231pd YMM0, YMM0, YMM0; 0xc4, 0x42, 0xbd, 0xBA, 0x00, // vfmsub231pd YMM8, YMM8, [R8]; 0xc4, 0xe2, 0x79, 0xBA, 0xc0, // vfmsub231ps XMM0, XMM0, XMM0; 0xc4, 0xe2, 0x79, 0xBA, 0x00, // vfmsub231ps XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0x7d, 0xBA, 0xc0, // vfmsub231ps YMM0, YMM0, YMM0; 0xc4, 0x42, 0x3d, 0xBA, 0x00, // vfmsub231ps YMM8, YMM8, [R8]; 0xc4, 0xe2, 0xf9, 0xBB, 0xc0, // vfmsub231sd XMM0, XMM0, XMM0; 0xc4, 0xe2, 0xf9, 0xBB, 0x00, // vfmsub231sd XMM0, XMM0, [RAX]; 0xc4, 0xe2, 0x79, 0xBB, 0xc0, // vfmsub231ss XMM0, XMM0, XMM0; 0xc4, 0xe2, 0x79, 0xBB, 0x00, // vfmsub231ss XMM0, XMM0, [RAX]; // 0x58, // pop RAX ]; asm { call L1; xgetbv; xsetbv; xrstor [RAX]; xrstor64 [RAX]; xsave [RAX]; xsave64 [RAX]; xsavec [RCX]; xsavec64 [RCX]; xsaveopt [RAX]; xsaveopt64 [RAX]; vldmxcsr [RAX]; vstmxcsr [RAX]; vaddss XMM0, XMM1, XMM2; vaddsd XMM0, XMM15, [RAX]; vaddps XMM12, XMM0, XMM0; vaddpd XMM8, XMM8, XMM8; vsubss XMM0, XMM1, XMM2; vsubsd XMM0, XMM15, [RAX]; vsubps XMM12, XMM0, XMM0; vsubpd XMM8, XMM8, XMM8; vaddsubps XMM0, XMM1, XMM2; vaddsubps YMM0, YMM1, YMM2; vaddsubpd YMM8, YMM1, YMM2; vaddsubpd YMM15, YMM15, 64[RAX]; vdpps YMM0, YMM0, YMM0, 0; vdppd XMM0, XMM0, XMM0, 0x88; vhaddpd YMM0, YMM8, [RDI]; vhaddps XMM0, XMM8, XMM1; vmaxpd YMM0, YMM0, YMM1; vmaxpd XMM0, XMM0, [RAX]; vmaxps YMM0, YMM0, YMM1; vmaxps XMM0, XMM0, [RAX]; vmaxsd XMM0, XMM0, [RAX]; vmaxss XMM0, XMM0, [RAX]; vminpd YMM0, YMM0, YMM1; vminpd XMM0, XMM0, [RAX]; vminps YMM0, YMM0, YMM1; vminps XMM0, XMM0, [RAX]; vminsd XMM0, XMM0, [RAX]; vminss XMM0, XMM0, [RAX]; vmovmskpd EAX, XMM0; vmovmskpd EDI, YMM0; vmovmskps EAX, YMM15; vmovmskps R8D, YMM0; vpmovmskb EAX, XMM0; vmpsadbw XMM0, XMM1, XMM2, 0x00; vmpsadbw XMM8, XMM9, XMM10, 0xFF; vpabsb XMM0, [RAX]; vpabsw XMM1, XMM15; vpabsd XMM1, [RBX]; vpaddb XMM0, XMM0, [RAX]; vpaddw XMM8, XMM8, XMM15; vpaddd XMM8, XMM8, [RBX]; vpaddq XMM0, XMM0, XMM0; vpsubb XMM0, XMM0, [RAX]; vpsubw XMM8, XMM8, XMM15; vpsubd XMM8, XMM8, [RBX]; vpsubq XMM0, XMM0, XMM0; vpaddsb XMM0, XMM0, XMM0; vpaddsw XMM0, XMM0, XMM0; vpaddusb XMM0, XMM0, XMM0; vpaddusw XMM0, XMM0, XMM0; vpsubsb XMM0, XMM0, XMM0; vpsubsw XMM0, XMM0, XMM0; vpsubusb XMM0, XMM0, XMM0; vpsubusw XMM0, XMM0, XMM0; vpavgb XMM0, XMM0, XMM0; vpavgw XMM0, XMM0, XMM0; pclmulqdq XMM0, 64[RAX + 4 * RCX], 0; vpclmulqdq XMM0, XMM0, 64[RAX + 4 * RCX], 0; vphaddw XMM0, XMM0, XMM0; vphaddd XMM0, XMM0, XMM0; vphsubw XMM0, XMM0, XMM0; vphsubd XMM0, XMM0, XMM0; vphaddsw XMM0, XMM0, XMM0; vphsubsw XMM0, XMM0, XMM0; vphminposuw XMM0, XMM0; vpmaddwd XMM0, XMM0, XMM0; vpmaddubsw XMM0, XMM0, XMM0; vpmaxsb XMM0, XMM0, XMM0; vpmaxsd XMM0, XMM0, XMM0; vpmaxsw XMM0, XMM0, XMM0; vpmaxub XMM0, XMM0, XMM0; vpmaxud XMM0, XMM0, XMM0; vpmaxuw XMM0, XMM0, XMM0; vpminsb XMM0, XMM0, XMM0; vpminsd XMM0, XMM0, XMM0; vpminsw XMM0, XMM0, XMM0; vpminub XMM0, XMM0, XMM0; vpminud XMM0, XMM0, XMM0; vpminuw XMM0, XMM0, XMM0; vpmulhrsw XMM0, XMM0, XMM0; vpmulhuw XMM0, XMM0, XMM0; vpmulhw XMM0, XMM0, XMM0; vpmulld XMM0, XMM0, XMM0; vpmullw XMM0, XMM0, XMM0; vpmuludq XMM0, XMM0, XMM0; vpmuldq XMM0, XMM0, XMM0; vpsadbw XMM0, XMM0, XMM0; vpsignb XMM0, XMM0, XMM0; vpsignw XMM0, XMM0, XMM0; vpsignd XMM0, XMM0, XMM0; vpslldq XMM0, XMM0, 0; vpsllw XMM0, XMM0, 0; vpsllw XMM0, XMM0, XMM0; vpslld XMM0, XMM0, 0; vpslld XMM0, XMM0, XMM0; vpsllq XMM0, XMM0, 0; vpsllq XMM0, XMM0, XMM0; vpsraw XMM0, XMM0, 0; vpsraw XMM0, XMM0, XMM0; vpsrad XMM0, XMM0, 0; vpsrad XMM0, XMM0, XMM0; vpsrldq XMM0, XMM0, 0; vpsrlw XMM0, XMM0, 0; vpsrlw XMM0, XMM0, XMM0; vpsrld XMM0, XMM0, 0; vpsrld XMM0, XMM0, XMM0; vpsrlq XMM0, XMM0, 0; vpsrlq XMM0, XMM0, XMM0; vrcpps XMM0, XMM1; vrcpps YMM0, YMM1; vrcpss XMM0, XMM0, XMM1; vroundpd XMM0, XMM0, 0; vroundpd YMM0, YMM0, 0; vroundps XMM0, XMM0, 0; vroundps YMM0, YMM0, 0; vroundsd XMM0, XMM0, XMM0, 0; vroundss XMM0, XMM0, XMM0, 0; vsqrtpd XMM0, XMM0; vsqrtpd YMM0, YMM0; vsqrtps XMM0, XMM0; vsqrtps YMM0, YMM0; vsqrtsd XMM0, XMM0, XMM0; vsqrtss XMM0, XMM0, XMM0; vzeroall; vzeroupper; vcmppd XMM0, XMM0, XMM0, 0; vcmppd YMM0, YMM0, YMM0, 0; vcmpps XMM0, XMM0, XMM0, 0; vcmpps YMM0, YMM0, YMM0, 0; vcmpsd XMM0, XMM0, XMM0, 0; vcmpss XMM0, XMM0, XMM0, 0; vcomisd XMM0, XMM0; vcomiss XMM0, XMM0; vpcmpeqb XMM0, XMM0, XMM0; vpcmpeqw XMM0, XMM0, XMM0; vpcmpeqd XMM0, XMM0, XMM0; vpcmpeqq XMM0, XMM0, XMM0; vpcmpgtb XMM0, XMM0, XMM0; vpcmpgtw XMM0, XMM0, XMM0; vpcmpgtd XMM0, XMM0, XMM0; vpcmpgtq XMM0, XMM0, XMM0; vpcmpestri XMM0, XMM0, 0; vpcmpestrm XMM0, XMM0, 0; vpcmpistri XMM0, XMM0, 0; vpcmpistrm XMM0, XMM0, 0; vcvtdq2pd XMM0, XMM0; vcvtdq2pd YMM0, XMM0; vcvtdq2pd YMM0, [RAX]; vcvtdq2ps XMM0, XMM0; vcvtdq2ps YMM0, YMM0; vcvtdq2ps YMM0, [RAX]; vcvtpd2dq XMM0, XMM0; vcvtpd2dq XMM0, YMM0; vcvtpd2dq XMM0, [RAX]; vcvtpd2ps XMM0, XMM0; vcvtpd2ps XMM0, YMM0; vcvtpd2ps XMM0, [RAX]; vcvtps2dq XMM0, XMM0; vcvtps2dq YMM0, YMM0; vcvtps2dq YMM0, [RAX]; vcvtps2pd XMM0, XMM0; vcvtps2pd YMM0, XMM0; vcvtps2pd YMM0, [RAX]; vcvtsd2si EAX, XMM0; vcvtsd2si RAX, XMM0; vcvtsd2si RAX, [RAX]; vcvtsd2ss XMM0, XMM0, XMM0; vcvtsd2ss XMM0, XMM0, [RAX]; vcvtsi2sd XMM0, XMM0, EAX; vcvtsi2sd XMM0, XMM0, RAX; vcvtsi2sd XMM0, XMM0, [RAX]; vcvtsi2ss XMM0, XMM0, EAX; vcvtsi2ss XMM0, XMM0, RAX; vcvtsi2ss XMM0, XMM0, [RAX]; vcvtsi2sd XMM0, XMM0, EAX; vcvtsi2sd XMM0, XMM0, RAX; vcvtsi2sd XMM0, XMM0, [RAX]; vcvtss2si EAX, XMM0; vcvtss2si RAX, XMM0; vcvtss2si RAX, [RAX]; vcvttpd2dq XMM0, XMM0; vcvttpd2dq XMM0, YMM0; vcvttpd2dq XMM0, [RAX]; vcvttps2dq XMM0, XMM0; vcvttps2dq YMM0, YMM0; vcvttps2dq YMM0, [RAX]; vcvttsd2si EAX, XMM0; vcvttsd2si RAX, XMM0; vcvttsd2si RAX, [RAX]; vcvttss2si EAX, XMM0; vcvttss2si RAX, XMM0; vcvttss2si RAX, [RAX]; vbroadcastss XMM0, [RAX]; vbroadcastss YMM0, [RAX]; vbroadcastsd YMM0, [RAX]; vbroadcastf128 YMM0, [RAX]; vextractf128 XMM0, YMM0, 0; vextractf128 [RAX], YMM0, 0; vextractps EAX, XMM0, 0; vextractps [RAX], XMM0, 0; vinsertf128 YMM0, YMM0, XMM0, 0; vinsertf128 YMM0, YMM0, [RAX], 0; vinsertps XMM0, XMM0, XMM0, 0; vinsertps XMM0, XMM0, [RAX], 0; vpinsrb XMM0, XMM0, EAX, 0; vpinsrb XMM0, XMM0, [RAX], 0; vpinsrw XMM0, XMM0, EAX, 0; vpinsrw XMM0, XMM0, [RAX], 0; vpinsrd XMM0, XMM0, EAX, 0; vpinsrd XMM0, XMM0, [RAX], 0; vpinsrq XMM0, XMM0, RAX, 0; vpinsrq XMM0, XMM0, [RAX], 0; vlddqu XMM0, [RAX]; vlddqu YMM0, [RAX]; vmaskmovdqu XMM0, XMM0; vmaskmovps XMM0, XMM0, [RAX]; vmaskmovps [RAX], XMM0, XMM0; vmaskmovps YMM0, YMM0, [RAX]; vmaskmovps [RAX], YMM0, YMM0; vmaskmovpd XMM0, XMM0, [RAX]; vmaskmovpd [RAX], XMM0, XMM0; vmaskmovpd YMM0, YMM0, [RAX]; vmaskmovpd [RAX], YMM0, YMM0; vmovapd YMM0, [RAX]; vmovapd YMM8, [RAX]; vmovapd YMM8, 64[RDI]; vmovapd [RAX], YMM0; vmovapd [RAX], YMM8; vmovapd 64[RDI], YMM8; vmovaps YMM0, [RAX]; vmovaps YMM8, [RAX]; vmovaps YMM8, 64[RDI]; vmovaps [RAX], YMM0; vmovaps [RAX], YMM8; vmovaps 64[RDI], YMM8; vmovupd YMM0, [RAX]; vmovupd YMM8, [RAX]; vmovupd YMM8, 64[RDI]; vmovupd [RAX], YMM0; vmovupd [RAX], YMM8; vmovupd 64[RDI], YMM8; vmovups YMM0, [RAX]; vmovups YMM8, [RAX]; vmovups YMM8, 64[RDI]; vmovups [RAX], YMM0; vmovups [RAX], YMM8; vmovups 64[RDI], YMM8; vmovd XMM0, EAX; vmovd XMM0, [RAX]; vmovd EAX, XMM0; vmovd [RAX], XMM0; vmovq XMM0, RAX; vmovq XMM0, [RAX]; vmovq RAX, XMM0; vmovq [RAX], XMM0; vmovdqa XMM0, XMM0; vmovdqa XMM0, [RAX]; vmovdqa YMM0, YMM0; vmovdqa YMM0, [RAX]; vmovdqa XMM0, XMM0; vmovdqa [RAX], XMM0; vmovdqa YMM0, YMM0; vmovdqa [RAX],YMM0; vmovdqu XMM0, XMM0; vmovdqu XMM0, [RAX]; vmovdqu YMM0, YMM0; vmovdqu YMM0, [RAX]; vmovdqu XMM0, XMM0; vmovdqu [RAX], XMM0; vmovdqu YMM0, YMM0; vmovdqu [RAX],YMM0; vmovhlps XMM0, XMM0, XMM0; vmovlhps XMM0, XMM0, XMM0; vmovhpd XMM0, XMM0, [RAX]; vmovhpd [RAX], XMM0; vmovhps XMM0, XMM0, [RAX]; vmovhps [RAX], XMM0; vmovlpd XMM0, XMM0, [RAX]; vmovlpd [RAX], XMM0; vmovlps XMM0, XMM0, [RAX]; vmovlps [RAX], XMM0; vmovntdq [RAX], XMM0; vmovntdq [RAX], YMM8; vmovntpd [RAX], XMM0; vmovntpd [RAX], YMM8; vmovntps [RAX], XMM0; vmovntps [RAX], YMM8; vmovntdqa XMM0, [RAX]; vmovsd XMM0, XMM0, XMM0; vmovsd XMM8, XMM8, XMM8; vmovsd [RAX], XMM0; vmovsd [R8], XMM8; vmovss XMM0, XMM0, XMM0; vmovss XMM8, XMM8, XMM8; vmovss [RAX], XMM0; vmovss [R8], XMM8; vmovshdup XMM8, XMM1; vmovshdup YMM0, YMM8; vmovshdup YMM0, [RAX]; vmovsldup XMM8, XMM1; vmovsldup YMM0, YMM8; vmovsldup YMM0, [RAX]; vpackuswb XMM0, XMM1, XMM2; vpackuswb XMM0, XMM8, [RAX]; vpackusdw XMM0, XMM1, XMM2; vpackusdw XMM0, XMM8, [RAX]; vpacksswb XMM0, XMM1, XMM2; vpacksswb XMM0, XMM8, [RAX]; vpackssdw XMM0, XMM1, XMM2; vpackssdw XMM0, XMM8, [RAX]; vpalignr XMM0, XMM1, XMM2, 0xFF; vpalignr XMM9, XMM8, [RAX], 0x10; vpextrb EAX, XMM0, 0x0; vpextrb R10, XMM9, 0xF; vpextrb [R10], XMM9, 0xF; vpextrd EAX, XMM0, 0x0; vpextrd R8D, XMM9, 0xF; vpextrd [R10], XMM9, 0xF; vpextrq RAX, XMM0, 0x0; vpextrq R10, XMM9, 0xF; vpextrq [R10], XMM9, 0xF; vpextrw ECX, XMM2, 0x3; vpextrw EAX, XMM0, 0x0; vpextrw R10, XMM9, 0xF; vpextrw [R10], XMM9, 0xF; vpmovsxbw XMM0, XMM0; vpmovsxbw XMM8, [R8]; vpmovsxbd XMM0, XMM0; vpmovsxbd XMM8, [R8]; vpmovsxbq XMM0, XMM0; vpmovsxbq XMM8, [R8]; vpmovsxwd XMM0, XMM0; vpmovsxwd XMM8, [R8]; vpmovsxwq XMM0, XMM0; vpmovsxwq XMM8, [R8]; vpmovsxdq XMM0, XMM0; vpmovsxdq XMM8, [R8]; vpmovzxbw XMM0, XMM0; vpmovzxbw XMM8, [R8]; vpmovzxbd XMM0, XMM0; vpmovzxbd XMM8, [R8]; vpmovzxbq XMM0, XMM0; vpmovzxbq XMM8, [R8]; vpmovzxwd XMM0, XMM0; vpmovzxwd XMM8, [R8]; vpmovzxwq XMM0, XMM0; vpmovzxwq XMM8, [R8]; vpmovzxdq XMM0, XMM0; vpmovzxdq XMM8, [R8]; vandpd XMM0, XMM0, XMM0; vandpd XMM9, XMM8, [R8]; vandps XMM0, XMM0, XMM0; vandps XMM9, XMM8, [R8]; vandnpd XMM0, XMM0, XMM0; vandnpd XMM9, XMM8, [R8]; vandnps XMM0, XMM0, XMM0; vandnps XMM9, XMM8, [R8]; vorpd XMM0, XMM0, XMM0; vorpd XMM9, XMM8, [R8]; vorps XMM0, XMM0, XMM0; vorps XMM9, XMM8, [R8]; vpand XMM0, XMM0, XMM0; vpand XMM9, XMM8, [R8]; vpandn XMM0, XMM0, XMM0; vpandn XMM9, XMM8, [R8]; vpor XMM0, XMM0, XMM0; vpor XMM9, XMM8, [R10]; vpxor XMM0, XMM0, XMM0; vpxor XMM9, XMM8, [R10]; vptest XMM0, XMM0; vptest XMM8, [RAX]; vptest YMM8, YMM8; vptest YMM0, [R8]; vucomisd XMM0, XMM0; vucomisd XMM8, [RAX]; vucomiss XMM0, XMM0; vucomiss XMM8, [RAX]; vxorpd XMM0, XMM8, XMM0; vxorpd XMM8, XMM0, [RAX]; vxorpd YMM0, YMM8, YMM0; vxorpd YMM8, YMM0, [RAX]; vxorps XMM0, XMM8, XMM0; vxorps XMM8, XMM0, [RAX]; vxorps YMM0, YMM8, YMM0; vxorps YMM8, YMM0, [RAX]; vblendpd XMM0, XMM1, XMM2, 0x00; vblendpd XMM9, XMM8, [RAX], 0xFF; vblendpd YMM0, YMM1, YMM2, 0x00; vblendpd YMM9, YMM8, [RAX], 0xFF; vblendps XMM0, XMM1, XMM2, 0x00; vblendps XMM9, XMM8, [RAX], 0xFF; vblendps YMM0, YMM1, YMM2, 0x00; vblendps YMM9, YMM8, [RAX], 0xFF; vblendvpd XMM0, XMM1, XMM2, 0x00; vblendvpd XMM9, XMM8, [RAX], 0xFF; vblendvpd YMM0, YMM1, YMM2, 0x00; vblendvpd YMM9, YMM8, [RAX], 0xFF; vblendvps XMM0, XMM1, XMM2, 0x00; vblendvps XMM9, XMM8, [RAX], 0xFF; vblendvps YMM0, YMM1, YMM2, 0x00; vblendvps YMM9, YMM8, [RAX], 0xFF; vmovddup XMM8, XMM0; vmovddup XMM0, [RAX]; vmovddup YMM0, YMM8; vmovddup YMM0, [R10]; vpblendvb XMM0, XMM8, XMM0, 0x00; vpblendvb XMM8, XMM0, [RAX], 0x00; vpblendvb XMM8, XMM0, [R10], 0x00; vpblendw XMM0, XMM8, XMM0, 0x00; vpblendw XMM8, XMM0, [RAX], 0x00; vpblendw XMM8, XMM0, [R10], 0x00; vpermilpd XMM0, XMM1, XMM2; vpermilpd XMM0, XMM1, [RAX]; vpermilpd XMM0, XMM1, 0x00; vpermilpd XMM0, [RAX], 0x00; vpermilps XMM0, XMM1, XMM2; vpermilps XMM0, XMM1, [RAX]; vpermilps XMM0, XMM1, 0x00; vpermilps XMM0, [RAX], 0x00; vperm2f128 YMM0, YMM1, YMM2, 0x00; vperm2f128 YMM0, YMM1, [RAX], 0x00; vperm2f128 YMM8, YMM9, [R10], 0x00; vpshufb XMM0, XMM0, XMM0; vpshufb XMM9, XMM8, [R8]; vpshufd XMM0, XMM0, 0x0; vpshufd XMM8, [R8], 0x0; vpshufhw XMM0, XMM0, 0x0; vpshufhw XMM8, [R8], 0x0; vpshuflw XMM0, XMM0, 0x0; vpshuflw XMM8, [R8], 0x0; vpunpckhbw XMM0, XMM1, XMM2; vpunpckhbw XMM8, XMM8, [R8]; vpunpckhwd XMM0, XMM1, XMM2; vpunpckhwd XMM8, XMM8, [R8]; vpunpckhdq XMM0, XMM1, XMM2; vpunpckhdq XMM8, XMM8, [R8]; vpunpckhqdq XMM0, XMM1, XMM2; vpunpckhqdq XMM8, XMM8, [R8]; vpunpcklbw XMM0, XMM1, XMM2; vpunpcklbw XMM8, XMM8, [R8]; vpunpcklwd XMM0, XMM1, XMM2; vpunpcklwd XMM8, XMM8, [R8]; vpunpckldq XMM0, XMM1, XMM2; vpunpckldq XMM8, XMM8, [R8]; vpunpcklqdq XMM0, XMM1, XMM2; vpunpcklqdq XMM8, XMM8, [R8]; vshufpd XMM0, XMM0, XMM0, 0x00; vshufpd XMM0, XMM8, [R8], 0x00; vshufpd YMM8, YMM0, YMM8, 0x00; vshufpd YMM8, YMM0, [RAX], 0x00; vshufps XMM0, XMM0, XMM0, 0x00; vshufps XMM0, XMM8, [R8], 0x00; vshufps YMM8, YMM0, YMM8, 0x00; vshufps YMM8, YMM0, [RAX], 0x00; vunpckhpd XMM0, XMM0, XMM0; vunpckhpd XMM8, XMM8, [RAX]; vunpckhpd YMM8, YMM0, [R8]; vunpckhpd YMM0, YMM8, [R8]; vunpckhps XMM0, XMM0, XMM0; vunpckhps XMM8, XMM8, [RAX]; vunpckhps YMM8, YMM0, [R8]; vunpckhps YMM0, YMM8, [R8]; vunpcklpd XMM0, XMM0, XMM0; vunpcklpd XMM8, XMM8, [RAX]; vunpcklpd YMM8, YMM0, [R8]; vunpcklpd YMM0, YMM8, [R8]; vunpcklps XMM0, XMM0, XMM0; vunpcklps XMM8, XMM8, [RAX]; vunpcklps YMM8, YMM0, [R8]; vunpcklps YMM0, YMM8, [R8]; /* AES */ aesenc XMM0, XMM0; aesenc XMM0, [RAX]; vaesenc XMM0, XMM0, XMM0; vaesenc XMM0, XMM0, [RAX]; aesenclast XMM0, XMM0; aesenclast XMM0, [RAX]; vaesenclast XMM0, XMM0, XMM0; vaesenclast XMM0, XMM0, [RAX]; aesdec XMM0, XMM0; aesdec XMM0, [RAX]; vaesdec XMM0, XMM0, XMM0; vaesdec XMM0, XMM0, [RAX]; aesdeclast XMM0, XMM0; aesdeclast XMM0, [RAX]; vaesdeclast XMM0, XMM0, XMM0; vaesdeclast XMM0, XMM0, [RAX]; aesimc XMM0, XMM0; aesimc XMM0, [RAX]; vaesimc XMM0, XMM0; vaesimc XMM0, [RAX]; aeskeygenassist XMM0, XMM0, 0x0; aeskeygenassist XMM0, [RAX], 0x0; vaeskeygenassist XMM0, XMM0, 0x0; vaeskeygenassist XMM0, [RAX], 0x0; /* FSGSBASE */ rdfsbase RAX; rdfsbase R15; rdgsbase RAX; rdgsbase R15; wrfsbase RAX; wrfsbase R15; wrgsbase RAX; wrgsbase R15; /* RDRAND */ rdrand AX; rdrand EAX; rdrand RAX; rdrand R15W; rdrand R15D; rdrand R15; /* RDSEED */ rdseed AX; rdseed EAX; rdseed RAX; rdseed R15W; rdseed R15D; rdseed R15; /* FP16C */ vcvtph2ps XMM0, XMM0; vcvtph2ps XMM0, [RAX]; vcvtph2ps YMM0, XMM0; vcvtph2ps YMM8, [R8]; vcvtps2ph XMM0, XMM0, 0x0; vcvtps2ph [RAX], XMM0, 0x0; vcvtps2ph XMM0, YMM0, 0x0; vcvtps2ph [R8], YMM8, 0x0; /* FMA */ vfmadd132pd XMM0, XMM0, XMM0; vfmadd132pd XMM0, XMM0, [RAX]; vfmadd132pd YMM0, YMM0, YMM0; vfmadd132pd YMM8, YMM8, [R8]; vfmadd132ps XMM0, XMM0, XMM0; vfmadd132ps XMM0, XMM0, [RAX]; vfmadd132ps YMM0, YMM0, YMM0; vfmadd132ps YMM8, YMM8, [R8]; vfmadd132sd XMM0, XMM0, XMM0; vfmadd132sd XMM0, XMM0, [RAX]; vfmadd132ss XMM0, XMM0, XMM0; vfmadd132ss XMM0, XMM0, [RAX]; vfmadd213pd XMM0, XMM0, XMM0; vfmadd213pd XMM0, XMM0, [RAX]; vfmadd213pd YMM0, YMM0, YMM0; vfmadd213pd YMM8, YMM8, [R8]; vfmadd213ps XMM0, XMM0, XMM0; vfmadd213ps XMM0, XMM0, [RAX]; vfmadd213ps YMM0, YMM0, YMM0; vfmadd213ps YMM8, YMM8, [R8]; vfmadd213sd XMM0, XMM0, XMM0; vfmadd213sd XMM0, XMM0, [RAX]; vfmadd213ss XMM0, XMM0, XMM0; vfmadd213ss XMM0, XMM0, [RAX]; vfmadd231pd XMM0, XMM0, XMM0; vfmadd231pd XMM0, XMM0, [RAX]; vfmadd231pd YMM0, YMM0, YMM0; vfmadd231pd YMM8, YMM8, [R8]; vfmadd231ps XMM0, XMM0, XMM0; vfmadd231ps XMM0, XMM0, [RAX]; vfmadd231ps YMM0, YMM0, YMM0; vfmadd231ps YMM8, YMM8, [R8]; vfmadd231sd XMM0, XMM0, XMM0; vfmadd231sd XMM0, XMM0, [RAX]; vfmadd231ss XMM0, XMM0, XMM0; vfmadd231ss XMM0, XMM0, [RAX]; vfmaddsub132pd XMM0, XMM0, XMM0; vfmaddsub132pd XMM0, XMM0, [RAX]; vfmaddsub132pd YMM0, YMM0, YMM0; vfmaddsub132pd YMM8, YMM8, [R8]; vfmaddsub132ps XMM0, XMM0, XMM0; vfmaddsub132ps XMM0, XMM0, [RAX]; vfmaddsub132ps YMM0, YMM0, YMM0; vfmaddsub132ps YMM8, YMM8, [R8]; vfmaddsub213pd XMM0, XMM0, XMM0; vfmaddsub213pd XMM0, XMM0, [RAX]; vfmaddsub213pd YMM0, YMM0, YMM0; vfmaddsub213pd YMM8, YMM8, [R8]; vfmaddsub213ps XMM0, XMM0, XMM0; vfmaddsub213ps XMM0, XMM0, [RAX]; vfmaddsub213ps YMM0, YMM0, YMM0; vfmaddsub213ps YMM8, YMM8, [R8]; vfmaddsub231pd XMM0, XMM0, XMM0; vfmaddsub231pd XMM0, XMM0, [RAX]; vfmaddsub231pd YMM0, YMM0, YMM0; vfmaddsub231pd YMM8, YMM8, [R8]; vfmaddsub231ps XMM0, XMM0, XMM0; vfmaddsub231ps XMM0, XMM0, [RAX]; vfmaddsub231ps YMM0, YMM0, YMM0; vfmaddsub231ps YMM8, YMM8, [R8]; vfmsubadd132pd XMM0, XMM0, XMM0; vfmsubadd132pd XMM0, XMM0, [RAX]; vfmsubadd132pd YMM0, YMM0, YMM0; vfmsubadd132pd YMM8, YMM8, [R8]; vfmsubadd132ps XMM0, XMM0, XMM0; vfmsubadd132ps XMM0, XMM0, [RAX]; vfmsubadd132ps YMM0, YMM0, YMM0; vfmsubadd132ps YMM8, YMM8, [R8]; vfmsubadd213pd XMM0, XMM0, XMM0; vfmsubadd213pd XMM0, XMM0, [RAX]; vfmsubadd213pd YMM0, YMM0, YMM0; vfmsubadd213pd YMM8, YMM8, [R8]; vfmsubadd213ps XMM0, XMM0, XMM0; vfmsubadd213ps XMM0, XMM0, [RAX]; vfmsubadd213ps YMM0, YMM0, YMM0; vfmsubadd213ps YMM8, YMM8, [R8]; vfmsubadd231pd XMM0, XMM0, XMM0; vfmsubadd231pd XMM0, XMM0, [RAX]; vfmsubadd231pd YMM0, YMM0, YMM0; vfmsubadd231pd YMM8, YMM8, [R8]; vfmsubadd231ps XMM0, XMM0, XMM0; vfmsubadd231ps XMM0, XMM0, [RAX]; vfmsubadd231ps YMM0, YMM0, YMM0; vfmsubadd231ps YMM8, YMM8, [R8]; vfmsub132pd XMM0, XMM0, XMM0; vfmsub132pd XMM0, XMM0, [RAX]; vfmsub132pd YMM0, YMM0, YMM0; vfmsub132pd YMM8, YMM8, [R8]; vfmsub132ps XMM0, XMM0, XMM0; vfmsub132ps XMM0, XMM0, [RAX]; vfmsub132ps YMM0, YMM0, YMM0; vfmsub132ps YMM8, YMM8, [R8]; vfmsub132sd XMM0, XMM0, XMM0; vfmsub132sd XMM0, XMM0, [RAX]; vfmsub132ss XMM0, XMM0, XMM0; vfmsub132ss XMM0, XMM0, [RAX]; vfmsub213pd XMM0, XMM0, XMM0; vfmsub213pd XMM0, XMM0, [RAX]; vfmsub213pd YMM0, YMM0, YMM0; vfmsub213pd YMM8, YMM8, [R8]; vfmsub213ps XMM0, XMM0, XMM0; vfmsub213ps XMM0, XMM0, [RAX]; vfmsub213ps YMM0, YMM0, YMM0; vfmsub213ps YMM8, YMM8, [R8]; vfmsub213sd XMM0, XMM0, XMM0; vfmsub213sd XMM0, XMM0, [RAX]; vfmsub213ss XMM0, XMM0, XMM0; vfmsub213ss XMM0, XMM0, [RAX]; vfmsub231pd XMM0, XMM0, XMM0; vfmsub231pd XMM0, XMM0, [RAX]; vfmsub231pd YMM0, YMM0, YMM0; vfmsub231pd YMM8, YMM8, [R8]; vfmsub231ps XMM0, XMM0, XMM0; vfmsub231ps XMM0, XMM0, [RAX]; vfmsub231ps YMM0, YMM0, YMM0; vfmsub231ps YMM8, YMM8, [R8]; vfmsub231sd XMM0, XMM0, XMM0; vfmsub231sd XMM0, XMM0, [RAX]; vfmsub231ss XMM0, XMM0, XMM0; vfmsub231ss XMM0, XMM0, [RAX]; L1: pop RAX; mov p[RBP],RAX; } foreach (ref i, b; data) { //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b); assert(p[i] == b); } assert(p[data.length] == 0x58); // pop RAX } /* ======================= SHA ========================== */ void test62() { ubyte* p; byte m8; short m16; int m32; M64 m64; M128 m128; static ubyte[] data = [ 0x0F, 0x3A, 0xCC, 0xD1, 0x01, // sha1rnds4 XMM2, XMM1, 1; 0x0F, 0x3A, 0xCC, 0x10, 0x01, // sha1rnds4 XMM2, [RAX], 1; 0x0F, 0x38, 0xC8, 0xD1, // sha1nexte XMM2, XMM1; 0x0F, 0x38, 0xC8, 0x10, // sha1nexte XMM2, [RAX]; 0x0F, 0x38, 0xC9, 0xD1, // sha1msg1 XMM2, XMM1; 0x0F, 0x38, 0xC9, 0x10, // sha1msg1 XMM2, [RAX]; 0x0F, 0x38, 0xCA, 0xD1, // sha1msg2 XMM2, XMM1; 0x0F, 0x38, 0xCA, 0x10, // sha1msg2 XMM2, [RAX]; 0x0F, 0x38, 0xCB, 0xD1, // sha256rnds2 XMM2, XMM1; 0x0F, 0x38, 0xCB, 0x10, // sha256rnds2 XMM2, [RAX]; 0x0F, 0x38, 0xCC, 0xD1, // sha256msg1 XMM2, XMM1; 0x0F, 0x38, 0xCC, 0x10, // sha256msg1 XMM2, [RAX]; 0x0F, 0x38, 0xCD, 0xD1, // sha256msg2 XMM2, XMM1; 0x0F, 0x38, 0xCD, 0x10, // sha256msg2 XMM2, [RAX]; ]; asm { call L1; sha1rnds4 XMM2, XMM1, 1; sha1rnds4 XMM2, [RAX], 1; sha1nexte XMM2, XMM1; sha1nexte XMM2, [RAX]; sha1msg1 XMM2, XMM1; sha1msg1 XMM2, [RAX]; sha1msg2 XMM2, XMM1; sha1msg2 XMM2, [RAX]; sha256rnds2 XMM2, XMM1; sha256rnds2 XMM2, [RAX]; sha256msg1 XMM2, XMM1; sha256msg1 XMM2, [RAX]; sha256msg2 XMM2, XMM1; sha256msg2 XMM2, [RAX]; L1: pop RAX; mov p[RBP],RAX; } foreach (ref i, b; data) { //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b); assert(p[i] == b); } } void test2941() { ubyte *p; static ubyte[] data = [ 0x9B, 0xDF, 0xE0, // fstsw AX; ]; int i; asm { call L1 ; fstsw AX; L1: pop RBX ; mov p[RBP],RBX ; } for (i = 0; i < data.length; i++) { assert(p[i] == data[i]); } } void test9866() { ubyte* p; static ubyte[] data = [ 0x48, 0x0f, 0xbe, 0xc0, // movsx RAX, AL 0x48, 0x0f, 0xbe, 0x00, // movsx RAX, byte ptr [RAX] 0x48, 0x0f, 0xbf, 0xc0, // movsx RAX, AX 0x48, 0x0f, 0xbf, 0x00, // movsx RAX, word ptr [RAX] 0x48, 0x63, 0xc0, // movsxd RAX, EAX 0x48, 0x63, 0x00, // movsxd RAX, dword ptr [RAX] ]; asm { call L1; movsx RAX, AL; movsx RAX, byte ptr [RAX]; movsx RAX, AX; movsx RAX, word ptr [RAX]; movsxd RAX, EAX; movsxd RAX, dword ptr [RAX]; L1: pop RAX; mov p[RBP],RAX; } foreach (ref i, b; data) { // printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b); assert(p[i] == b); } assert(p[data.length] == 0x58); // pop RAX } /****************************************************/ void testxadd() { int x; ubyte* p; static ubyte[] data = [ 0x0F, 0xC0, 0x10, 0x66, 0x0F, 0xC1, 0x10, 0x0F, 0xC1, 0x10, 0x48, 0x0F, 0xC1, 0x10, ]; asm { call L1 ; xadd byte ptr [RAX],DL; xadd word ptr [RAX],DX; xadd dword ptr [RAX],EDX; xadd qword ptr [RAX],RDX; L1: pop RAX; mov p[RBP],RAX; } foreach (ref i, b; data) { //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b); assert(p[i] == b); } } /****************************************************/ void testconst() { ulong result; asm { mov RAX, 0xFFFF_FFFFu; mov result, RAX; } assert (result == 0xFFFF_FFFFu); } /****************************************************/ void test9965() { ubyte* p; static ubyte[] data = [ 0xB7, 0x01, // mov BH,1 0x40, 0xB6, 0x01, // mov SIL,1 0x40, 0xB7, 0x01, // mov DIL,1 0x40, 0xB5, 0x01, // mov BPL,1 0x40, 0xB4, 0x01, // mov SPL,1 0x41, 0xB0, 0x01, // mov R8B,1 0x40, 0x80, 0xE6, 0x01, // and SIL,1 (https://issues.dlang.org/show_bug.cgi?id=12971) ]; asm { call L1; mov BH, 1; mov SIL, 1; mov DIL, 1; mov BPL, 1; mov SPL, 1; mov R8B, 1; and SIL, 1; L1: pop RAX; mov p[RBP],RAX; } foreach (ref i, b; data) { // printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b); assert(p[i] == b); } assert(p[data.length] == 0x58); // pop RAX } /****************************************************/ void test12849() { ulong a = 0xff00ff00ff00ff00L; ulong result; ulong expected = 0b10101010; asm { pxor XMM0, XMM0; movq XMM0, a; pmovmskb RAX, XMM0; mov result, RAX; } assert (result == expected); } /****************************************************/ void test12968() { int x; ubyte* p; static ubyte[] data = [ 0x48, 0x89, 0xF8, 0x4C, 0x87, 0xC2, 0xC3 ]; asm { call L1 ; mov RAX, RDI; xchg RDX, R8; ret; L1: pop RAX; mov p[RBP],RAX; } foreach (ref i, b; data) { //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b); assert(p[i] == b); } } /****************************************************/ void test15999() { int x; ubyte* p; static ubyte[] data = [ 0x48, 0x83, 0xE0, 0xFF, 0x83, 0xE0, 0xFF, 0x48, 0x25, 0x00, 0x00, 0x00, 0x80, 0xC3 ]; asm { call L1 ; and RAX, -1; and EAX, -1; and RAX, 0xFFFF_FFFF_8000_0000; ret; L1: pop RAX; mov p[RBP],RAX; } foreach (ref i, b; data) { //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b); assert(p[i] == b); } } /****************************************************/ void test17027() { ubyte* p; static ubyte[] data = [ 0xF3, 0x48, 0x0F, 0xBC, 0xC0, 0xF3, 0x48, 0x0F, 0xBC, 0x00, 0xF3, 0x48, 0x0F, 0xBC, 0x40, 0x08, 0xF3, 0x0F, 0xBC, 0xC0, 0xF3, 0x48, 0x0F, 0xBD, 0xC0, 0xF3, 0x48, 0x0F, 0xBD, 0x00, 0xF3, 0x48, 0x0F, 0xBD, 0x40, 0x08, 0xF3, 0x0F, 0xBD, 0xC0, ]; asm { call L1; tzcnt RAX, RAX; tzcnt RAX, [RAX]; tzcnt RAX, [RAX+8]; tzcnt EAX, EAX; lzcnt RAX, RAX; lzcnt RAX, [RAX]; lzcnt RAX, [RAX+8]; lzcnt EAX, EAX; L1: pop RAX; mov p[RBP],RAX; } foreach (ref i, b; data) { //printf("data[%d] = 0x%02x, should be 0x%02x\n", i, p[i], b); assert(p[i] == b); } } /****************************************************/ void test18553() { ubyte* p; static ubyte[] data = [ 0x0F, 0x20, 0xC0, 0x0F, 0x20, 0xD0, 0x0F, 0x20, 0xD8, 0x0F, 0x20, 0xE0, 0x0F, 0x22, 0xC0, 0x0F, 0x22, 0xD0, 0x0F, 0x22, 0xD8, 0x0F, 0x22, 0xE0, 0x44, 0x0F, 0x22, 0xC0, 0x44, 0x0F, 0x20, 0xC0, ]; asm { call L1; mov RAX, CR0; mov RAX, CR2; mov RAX, CR3; mov RAX, CR4; mov CR0, RAX; mov CR2, RAX; mov CR3, RAX; mov CR4, RAX; mov CR8, RAX; mov RAX, CR8; L1: pop RAX; mov p[RBP],RAX; } foreach (ref i, b; data) { assert(p[i] == b); } } /****************************************************/ // https://issues.dlang.org/show_bug.cgi?id=20126 extern(C) float floop(float* r, float x) { asm { mov EAX, x; mov RCX, r; xchg [RCX], EAX; mov x, EAX; } return x; } void test20126() { float r = 1.0; float x = 2.0; float f = floop(&r, x); assert(f == 1.0); } /****************************************************/ void test63() { asm { L1: mov EAX,0; jmp L2; db 0,0,0,0,0,0,0,0 ; db 0,0,0,0,0,0,0,0 ; db 0,0,0,0,0,0,0,0 ; db 0,0,0,0,0,0,0,0 ; db 0,0,0,0,0,0,0,0 ; db 0,0,0,0,0,0,0,0 ; db 0,0,0,0,0,0,0,0 ; db 0,0,0,0,0,0,0,0 ; db 0,0,0,0,0,0,0,0 ; db 0,0,0,0,0,0,0,0 ; db 0,0,0,0,0,0,0,0 ; db 0,0,0,0,0,0,0,0 ; db 0,0,0,0,0,0,0,0 ; db 0,0,0,0,0,0,0,0 ; db 0,0,0,0,0,0,0,0 ; db 0,0,0,0,0,0,0,0 ; } asm { jmp L1; // more than 128 bytes away } L2: { } } /****************************************************/ // https://issues.dlang.org/show_bug.cgi?id=6166 struct NRV { int[8] a; } NRV rvo() { NRV v; v.a[1] = 3; asm @nogc pure { mov int ptr v+4,7; } return v; } void test6166() { auto n = rvo(); assert(n.a[1] == 7); } /****************************************************/ int main() { printf("Testing iasm64.d\n"); test1(); test2(); test3(); test4(); test5(); test6(); //test7(); TODO 16bit seg test8(); //test9(); Fails //test10(); Fails test11(); test12(); test13(); test14(); test15(); //test16(); // add this one from \cbx\test\iasm.c ? test17(); test18(); test19(); //test20(); 8087 test21(); test22(); test23(); test24(); test25(); test26(); test27(); test28(); //test29(); offsetof? test30(); test31(); test32(); test33(); test34(); //test35(); RIP addressing? //test36(); RIP addressing? test37(); test38(); test39(); test40(); test41(); test42(); test43(); test44(); test45(); test46(); //test47(); RIP addressing? test48(); test49(); test50(); //Test51 test52(); test53(); test54(); test55(); test56(); test57(); test58(); test59(); test60(); test61(); test62(); test2941(); test9866(); testxadd(); test9965(); test12849(); test12968(); test15999(); testconst(); test17027(); test18553(); test20126(); test63(); test6166(); printf("Success\n"); return 0; } } else { int main() { return 0; } }