From 53bb222fb3e938e9a44468fcfa21823cbd8d71a0 Mon Sep 17 00:00:00 2001 From: Adam Perlin Date: Fri, 8 May 2026 16:10:58 -0700 Subject: [PATCH 01/22] Add Wasm packed SIMD instruction encodings to JIT emitter Add instruction definitions and emitter infrastructure for the WebAssembly packed SIMD (128-bit) instruction set: - Add ~200 INST2 entries in instrswasm.h for all SIMD opcodes (0xFD prefix) - Add 4 new instruction formats: IF_V128_CONST, IF_LANE, IF_MEMARG_LANE, IF_SHUFFLE in emitfmtswasm.h - Add instrDesc subclasses in emit.h for SIMD-specific payloads: instrDescV128Const, instrDescShuffle, instrDescLane, instrDescMemargLane - Increase TARGET_WASM instruction bit-field to 9 bits (512 max) in emit.h - Add emit functions (emitIns_V128Const, emitIns_Lane, emitIns_MemargLane, emitIns_Shuffle) with size calculation and output encoding in emitwasm.cpp - Declare new emit functions in emitwasm.h Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/emit.h | 73 +++++++++ src/coreclr/jit/emitfmtswasm.h | 4 + src/coreclr/jit/emitwasm.cpp | 148 +++++++++++++++++ src/coreclr/jit/emitwasm.h | 6 + src/coreclr/jit/instrswasm.h | 282 +++++++++++++++++++++++++++++++++ 5 files changed, 513 insertions(+) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index d871a91cd8a516..f6a738a009b226 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -694,6 +694,9 @@ class emitter // TODO-LoongArch64: not include SIMD-vector. static_assert(INS_count <= 512); instruction _idIns : 9; +#elif defined(TARGET_WASM) + static_assert(INS_count <= 512); + instruction _idIns : 9; #else static_assert(INS_count <= 256); instruction _idIns : 8; @@ -2418,6 +2421,76 @@ class emitter imm = i; } }; + + struct instrDescV128Const : instrDesc + { + instrDescV128Const() = delete; + + uint8_t v128Bytes[16]; + + void idV128Const(const uint8_t* bytes) + { + assert(bytes != nullptr); + memcpy(v128Bytes, bytes, 16); + } + + const uint8_t* idV128Const() const + { + return v128Bytes; + } + }; + + struct instrDescShuffle : instrDesc + { + instrDescShuffle() = delete; + + uint8_t shuffleLanes[16]; + + void idShuffleLanes(const uint8_t* lanes) + { + assert(lanes != nullptr); + memcpy(shuffleLanes, lanes, 16); + } + + const uint8_t* idShuffleLanes() const + { + return shuffleLanes; + } + }; + + struct instrDescLane : instrDesc + { + instrDescLane() = delete; + + uint8_t lane; + + void idLaneIdx(uint8_t idx) + { + lane = idx; + } + + uint8_t idLaneIdx() const + { + return lane; + } + }; + + struct instrDescMemargLane : instrDescCns + { + instrDescMemargLane() = delete; + + uint8_t lane; + + void idLaneIdx(uint8_t idx) + { + lane = idx; + } + + uint8_t idLaneIdx() const + { + return lane; + } + }; #endif // TARGET_WASM #ifdef TARGET_RISCV64 diff --git a/src/coreclr/jit/emitfmtswasm.h b/src/coreclr/jit/emitfmtswasm.h index 4cb69ee24e57c5..aa34ed6c06ef7b 100644 --- a/src/coreclr/jit/emitfmtswasm.h +++ b/src/coreclr/jit/emitfmtswasm.h @@ -44,6 +44,10 @@ IF_DEF(CALL_INDIRECT, IS_NONE, NONE) // IF_DEF(TRY_TABLE, IS_NONE, NONE) // IF_DEF(CATCH_DECL, IS_NONE, NONE) // +IF_DEF(V128_CONST, IS_NONE, NONE) // <16 raw bytes> +IF_DEF(LANE, IS_NONE, NONE) // +IF_DEF(MEMARG_LANE, IS_NONE, NONE) // +IF_DEF(SHUFFLE, IS_NONE, NONE) // <16 lane-index bytes> #undef IF_DEF #endif // !DEFINE_ID_OPS diff --git a/src/coreclr/jit/emitwasm.cpp b/src/coreclr/jit/emitwasm.cpp index 4eaaf589cc9b20..768ad1a7bc691f 100644 --- a/src/coreclr/jit/emitwasm.cpp +++ b/src/coreclr/jit/emitwasm.cpp @@ -366,6 +366,89 @@ unsigned int emitter::emitGetValTypeImmImm(const instrDesc* id) return static_cast(id)->imm; } +//------------------------------------------------------------------------ +// Packed SIMD instruction emit functions +//------------------------------------------------------------------------ + +//------------------------------------------------------------------------ +// emitIns_V128Const: Emit a v128.const instruction with 16 raw bytes. +// +// Arguments: +// ins - instruction (INS_v128_const) +// bytes - pointer to 16 bytes of constant data +// +void emitter::emitIns_V128Const(instruction ins, const uint8_t* bytes) +{ + assert(bytes != nullptr); + instrDescV128Const* id = static_cast(emitAllocAnyInstr(sizeof(instrDescV128Const), EA_16BYTE)); + id->idIns(ins); + id->idInsFmt(IF_V128_CONST); + id->idV128Const(bytes); + + dispIns(id); + appendToCurIG(id); +} + +//------------------------------------------------------------------------ +// emitIns_Lane: Emit a SIMD extract/replace lane instruction. +// +// Arguments: +// ins - instruction (e.g., INS_i8x16_extract_lane_s) +// attr - emit attribute indicating the lane element size +// laneIdx - lane index byte +// +void emitter::emitIns_Lane(instruction ins, emitAttr attr, uint8_t laneIdx) +{ + instrDescLane* id = static_cast(emitAllocAnyInstr(sizeof(instrDescLane), attr)); + id->idIns(ins); + id->idInsFmt(IF_LANE); + id->idLaneIdx(laneIdx); + + dispIns(id); + appendToCurIG(id); +} + +//------------------------------------------------------------------------ +// emitIns_MemargLane: Emit a SIMD load/store lane instruction with memarg + lane index. +// +// Arguments: +// ins - instruction (e.g., INS_v128_load8_lane) +// attr - emit attribute indicating the memory access size +// offset - memory offset for the memarg +// laneIdx - lane index byte +// +void emitter::emitIns_MemargLane(instruction ins, emitAttr attr, cnsval_ssize_t offset, uint8_t laneIdx) +{ + instrDescMemargLane* id = + static_cast(emitAllocAnyInstr(sizeof(instrDescMemargLane), attr)); + id->idIns(ins); + id->idInsFmt(IF_MEMARG_LANE); + id->idcCnsVal = offset; + id->idLaneIdx(laneIdx); + + dispIns(id); + appendToCurIG(id); +} + +//------------------------------------------------------------------------ +// emitIns_Shuffle: Emit an i8x16.shuffle instruction with 16 lane-index bytes. +// +// Arguments: +// ins - instruction (INS_i8x16_shuffle) +// laneIndices - pointer to 16 lane index bytes +// +void emitter::emitIns_Shuffle(instruction ins, const uint8_t* laneIndices) +{ + assert(laneIndices != nullptr); + instrDescShuffle* id = static_cast(emitAllocAnyInstr(sizeof(instrDescShuffle), EA_16BYTE)); + id->idIns(ins); + id->idInsFmt(IF_SHUFFLE); + id->idShuffleLanes(laneIndices); + + dispIns(id); + appendToCurIG(id); +} + emitter::insFormat emitter::emitInsFormat(instruction ins) { static_assert(IF_COUNT < 255); @@ -434,6 +517,20 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id) const return sizeof(instrDescValTypeImm); } + switch (id->idInsFmt()) + { + case IF_V128_CONST: + return sizeof(instrDescV128Const); + case IF_SHUFFLE: + return sizeof(instrDescShuffle); + case IF_LANE: + return sizeof(instrDescLane); + case IF_MEMARG_LANE: + return sizeof(instrDescMemargLane); + default: + break; + } + return sizeof(instrDesc); } @@ -564,6 +661,23 @@ unsigned emitter::instrDesc::idCodeSize() const size += SizeOfULEB128(emitGetInsSC(this)); // control flow stack offset break; } + case IF_V128_CONST: + size += 16; // 16 raw bytes for the v128 constant + break; + case IF_LANE: + size += 1; // 1 byte lane index + break; + case IF_MEMARG_LANE: + { + uint64_t align = emitGetAlignHintLog2(this); + size += SizeOfULEB128(align); + size += idIsCnsReloc() ? PADDED_RELOC_SIZE : SizeOfULEB128(emitGetInsSC(this)); + size += 1; // 1 byte lane index + break; + } + case IF_SHUFFLE: + size += 16; // 16 lane-index bytes + break; default: unreached(); } @@ -867,6 +981,40 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutputULEB128Padded(dst, (int64_t)size); break; } + case IF_V128_CONST: + { + dst += emitOutputOpcode(dst, ins); + const instrDescV128Const* idConst = static_cast(id); + dst += emitRawBytes(dst, idConst->idV128Const(), 16); + break; + } + case IF_LANE: + { + dst += emitOutputOpcode(dst, ins); + const instrDescLane* idLane = static_cast(id); + dst += emitOutputByte(dst, idLane->idLaneIdx()); + break; + } + case IF_MEMARG_LANE: + { + dst += emitOutputOpcode(dst, ins); + const instrDescMemargLane* idMemLane = static_cast(id); + uint64_t align = emitGetAlignHintLog2(id); + uint64_t offset = emitGetInsSC(id); + assert(align <= UINT32_MAX); + assert(align < 64); + dst += emitOutputULEB128(dst, align); + dst += emitOutputULEB128(dst, offset); + dst += emitOutputByte(dst, idMemLane->idLaneIdx()); + break; + } + case IF_SHUFFLE: + { + dst += emitOutputOpcode(dst, ins); + const instrDescShuffle* idShuf = static_cast(id); + dst += emitRawBytes(dst, idShuf->idShuffleLanes(), 16); + break; + } default: NYI_WASM("emitOutputInstr"); break; diff --git a/src/coreclr/jit/emitwasm.h b/src/coreclr/jit/emitwasm.h index d97555fd3760a5..411ca69c64e11a 100644 --- a/src/coreclr/jit/emitwasm.h +++ b/src/coreclr/jit/emitwasm.h @@ -31,6 +31,12 @@ void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2) void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs); +// Packed SIMD instruction emit functions +void emitIns_V128Const(instruction ins, const uint8_t* bytes); +void emitIns_Lane(instruction ins, emitAttr attr, uint8_t laneIdx); +void emitIns_MemargLane(instruction ins, emitAttr attr, cnsval_ssize_t offset, uint8_t laneIdx); +void emitIns_Shuffle(instruction ins, const uint8_t* laneIndices); + void emitAddressConstant(void* address); static unsigned SizeOfULEB128(uint64_t value); diff --git a/src/coreclr/jit/instrswasm.h b/src/coreclr/jit/instrswasm.h index d499e3e4b9ff15..6632c4e488742f 100644 --- a/src/coreclr/jit/instrswasm.h +++ b/src/coreclr/jit/instrswasm.h @@ -241,6 +241,288 @@ INST2(i64_trunc_sat_f64_u, "i64.trunc_sat_f64_u", 0, IF_OPCODE, 0xFC, 7) INST2(memory_copy, "memory.copy", 0, IF_MEMIDX_MEMIDX, 0xFC, 10) INST2(memory_fill, "memory.fill", 0, IF_ULEB128, 0xFC, 11) +// 5.4.9 Vector Instructions (SIMD, 0xFD prefix) +// +// Memory operations (memarg format) +INST2(v128_load, "v128.load", 0, IF_MEMARG, 0xFD, 0) +INST2(v128_load8x8_s, "v128.load8x8_s", 0, IF_MEMARG, 0xFD, 1) +INST2(v128_load8x8_u, "v128.load8x8_u", 0, IF_MEMARG, 0xFD, 2) +INST2(v128_load16x4_s, "v128.load16x4_s", 0, IF_MEMARG, 0xFD, 3) +INST2(v128_load16x4_u, "v128.load16x4_u", 0, IF_MEMARG, 0xFD, 4) +INST2(v128_load32x2_s, "v128.load32x2_s", 0, IF_MEMARG, 0xFD, 5) +INST2(v128_load32x2_u, "v128.load32x2_u", 0, IF_MEMARG, 0xFD, 6) +INST2(v128_load8_splat, "v128.load8_splat", 0, IF_MEMARG, 0xFD, 7) +INST2(v128_load16_splat, "v128.load16_splat", 0, IF_MEMARG, 0xFD, 8) +INST2(v128_load32_splat, "v128.load32_splat", 0, IF_MEMARG, 0xFD, 9) +INST2(v128_load64_splat, "v128.load64_splat", 0, IF_MEMARG, 0xFD, 10) +INST2(v128_store, "v128.store", 0, IF_MEMARG, 0xFD, 11) + +// v128.const and i8x16.shuffle (special formats) +INST2(v128_const, "v128.const", 0, IF_V128_CONST, 0xFD, 12) +INST2(i8x16_shuffle, "i8x16.shuffle", 0, IF_SHUFFLE, 0xFD, 13) + +// i8x16.swizzle (plain opcode) +INST2(i8x16_swizzle, "i8x16.swizzle", 0, IF_OPCODE, 0xFD, 14) + +// Splat operations +INST2(i8x16_splat, "i8x16.splat", 0, IF_OPCODE, 0xFD, 15) +INST2(i16x8_splat, "i16x8.splat", 0, IF_OPCODE, 0xFD, 16) +INST2(i32x4_splat, "i32x4.splat", 0, IF_OPCODE, 0xFD, 17) +INST2(i64x2_splat, "i64x2.splat", 0, IF_OPCODE, 0xFD, 18) +INST2(f32x4_splat, "f32x4.splat", 0, IF_OPCODE, 0xFD, 19) +INST2(f64x2_splat, "f64x2.splat", 0, IF_OPCODE, 0xFD, 20) + +// Extract/replace lane operations (lane index byte) +INST2(i8x16_extract_lane_s, "i8x16.extract_lane_s", 0, IF_LANE, 0xFD, 21) +INST2(i8x16_extract_lane_u, "i8x16.extract_lane_u", 0, IF_LANE, 0xFD, 22) +INST2(i8x16_replace_lane, "i8x16.replace_lane", 0, IF_LANE, 0xFD, 23) +INST2(i16x8_extract_lane_s, "i16x8.extract_lane_s", 0, IF_LANE, 0xFD, 24) +INST2(i16x8_extract_lane_u, "i16x8.extract_lane_u", 0, IF_LANE, 0xFD, 25) +INST2(i16x8_replace_lane, "i16x8.replace_lane", 0, IF_LANE, 0xFD, 26) +INST2(i32x4_extract_lane, "i32x4.extract_lane", 0, IF_LANE, 0xFD, 27) +INST2(i32x4_replace_lane, "i32x4.replace_lane", 0, IF_LANE, 0xFD, 28) +INST2(i64x2_extract_lane, "i64x2.extract_lane", 0, IF_LANE, 0xFD, 29) +INST2(i64x2_replace_lane, "i64x2.replace_lane", 0, IF_LANE, 0xFD, 30) +INST2(f32x4_extract_lane, "f32x4.extract_lane", 0, IF_LANE, 0xFD, 31) +INST2(f32x4_replace_lane, "f32x4.replace_lane", 0, IF_LANE, 0xFD, 32) +INST2(f64x2_extract_lane, "f64x2.extract_lane", 0, IF_LANE, 0xFD, 33) +INST2(f64x2_replace_lane, "f64x2.replace_lane", 0, IF_LANE, 0xFD, 34) + +// i8x16 comparisons +INST2(i8x16_eq, "i8x16.eq", 0, IF_OPCODE, 0xFD, 35) +INST2(i8x16_ne, "i8x16.ne", 0, IF_OPCODE, 0xFD, 36) +INST2(i8x16_lt_s, "i8x16.lt_s", 0, IF_OPCODE, 0xFD, 37) +INST2(i8x16_lt_u, "i8x16.lt_u", 0, IF_OPCODE, 0xFD, 38) +INST2(i8x16_gt_s, "i8x16.gt_s", 0, IF_OPCODE, 0xFD, 39) +INST2(i8x16_gt_u, "i8x16.gt_u", 0, IF_OPCODE, 0xFD, 40) +INST2(i8x16_le_s, "i8x16.le_s", 0, IF_OPCODE, 0xFD, 41) +INST2(i8x16_le_u, "i8x16.le_u", 0, IF_OPCODE, 0xFD, 42) +INST2(i8x16_ge_s, "i8x16.ge_s", 0, IF_OPCODE, 0xFD, 43) +INST2(i8x16_ge_u, "i8x16.ge_u", 0, IF_OPCODE, 0xFD, 44) + +// i16x8 comparisons +INST2(i16x8_eq, "i16x8.eq", 0, IF_OPCODE, 0xFD, 45) +INST2(i16x8_ne, "i16x8.ne", 0, IF_OPCODE, 0xFD, 46) +INST2(i16x8_lt_s, "i16x8.lt_s", 0, IF_OPCODE, 0xFD, 47) +INST2(i16x8_lt_u, "i16x8.lt_u", 0, IF_OPCODE, 0xFD, 48) +INST2(i16x8_gt_s, "i16x8.gt_s", 0, IF_OPCODE, 0xFD, 49) +INST2(i16x8_gt_u, "i16x8.gt_u", 0, IF_OPCODE, 0xFD, 50) +INST2(i16x8_le_s, "i16x8.le_s", 0, IF_OPCODE, 0xFD, 51) +INST2(i16x8_le_u, "i16x8.le_u", 0, IF_OPCODE, 0xFD, 52) +INST2(i16x8_ge_s, "i16x8.ge_s", 0, IF_OPCODE, 0xFD, 53) +INST2(i16x8_ge_u, "i16x8.ge_u", 0, IF_OPCODE, 0xFD, 54) + +// i32x4 comparisons +INST2(i32x4_eq, "i32x4.eq", 0, IF_OPCODE, 0xFD, 55) +INST2(i32x4_ne, "i32x4.ne", 0, IF_OPCODE, 0xFD, 56) +INST2(i32x4_lt_s, "i32x4.lt_s", 0, IF_OPCODE, 0xFD, 57) +INST2(i32x4_lt_u, "i32x4.lt_u", 0, IF_OPCODE, 0xFD, 58) +INST2(i32x4_gt_s, "i32x4.gt_s", 0, IF_OPCODE, 0xFD, 59) +INST2(i32x4_gt_u, "i32x4.gt_u", 0, IF_OPCODE, 0xFD, 60) +INST2(i32x4_le_s, "i32x4.le_s", 0, IF_OPCODE, 0xFD, 61) +INST2(i32x4_le_u, "i32x4.le_u", 0, IF_OPCODE, 0xFD, 62) +INST2(i32x4_ge_s, "i32x4.ge_s", 0, IF_OPCODE, 0xFD, 63) +INST2(i32x4_ge_u, "i32x4.ge_u", 0, IF_OPCODE, 0xFD, 64) + +// i64x2 comparisons +INST2(i64x2_eq, "i64x2.eq", 0, IF_OPCODE, 0xFD, 214) +INST2(i64x2_ne, "i64x2.ne", 0, IF_OPCODE, 0xFD, 215) +INST2(i64x2_lt_s, "i64x2.lt_s", 0, IF_OPCODE, 0xFD, 216) +INST2(i64x2_gt_s, "i64x2.gt_s", 0, IF_OPCODE, 0xFD, 217) +INST2(i64x2_le_s, "i64x2.le_s", 0, IF_OPCODE, 0xFD, 218) +INST2(i64x2_ge_s, "i64x2.ge_s", 0, IF_OPCODE, 0xFD, 219) + +// f32x4 comparisons +INST2(f32x4_eq, "f32x4.eq", 0, IF_OPCODE, 0xFD, 65) +INST2(f32x4_ne, "f32x4.ne", 0, IF_OPCODE, 0xFD, 66) +INST2(f32x4_lt, "f32x4.lt", 0, IF_OPCODE, 0xFD, 67) +INST2(f32x4_gt, "f32x4.gt", 0, IF_OPCODE, 0xFD, 68) +INST2(f32x4_le, "f32x4.le", 0, IF_OPCODE, 0xFD, 69) +INST2(f32x4_ge, "f32x4.ge", 0, IF_OPCODE, 0xFD, 70) + +// f64x2 comparisons +INST2(f64x2_eq, "f64x2.eq", 0, IF_OPCODE, 0xFD, 71) +INST2(f64x2_ne, "f64x2.ne", 0, IF_OPCODE, 0xFD, 72) +INST2(f64x2_lt, "f64x2.lt", 0, IF_OPCODE, 0xFD, 73) +INST2(f64x2_gt, "f64x2.gt", 0, IF_OPCODE, 0xFD, 74) +INST2(f64x2_le, "f64x2.le", 0, IF_OPCODE, 0xFD, 75) +INST2(f64x2_ge, "f64x2.ge", 0, IF_OPCODE, 0xFD, 76) + +// v128 bitwise operations +INST2(v128_not, "v128.not", 0, IF_OPCODE, 0xFD, 77) +INST2(v128_and, "v128.and", 0, IF_OPCODE, 0xFD, 78) +INST2(v128_andnot, "v128.andnot", 0, IF_OPCODE, 0xFD, 79) +INST2(v128_or, "v128.or", 0, IF_OPCODE, 0xFD, 80) +INST2(v128_xor, "v128.xor", 0, IF_OPCODE, 0xFD, 81) +INST2(v128_bitselect, "v128.bitselect", 0, IF_OPCODE, 0xFD, 82) +INST2(v128_any_true, "v128.any_true", 0, IF_OPCODE, 0xFD, 83) + +// Load/store lane operations (memarg + lane index) +INST2(v128_load8_lane, "v128.load8_lane", 0, IF_MEMARG_LANE, 0xFD, 84) +INST2(v128_load16_lane, "v128.load16_lane", 0, IF_MEMARG_LANE, 0xFD, 85) +INST2(v128_load32_lane, "v128.load32_lane", 0, IF_MEMARG_LANE, 0xFD, 86) +INST2(v128_load64_lane, "v128.load64_lane", 0, IF_MEMARG_LANE, 0xFD, 87) +INST2(v128_store8_lane, "v128.store8_lane", 0, IF_MEMARG_LANE, 0xFD, 88) +INST2(v128_store16_lane, "v128.store16_lane", 0, IF_MEMARG_LANE, 0xFD, 89) +INST2(v128_store32_lane, "v128.store32_lane", 0, IF_MEMARG_LANE, 0xFD, 90) +INST2(v128_store64_lane, "v128.store64_lane", 0, IF_MEMARG_LANE, 0xFD, 91) + +// Load zero operations (memarg) +INST2(v128_load32_zero, "v128.load32_zero", 0, IF_MEMARG, 0xFD, 92) +INST2(v128_load64_zero, "v128.load64_zero", 0, IF_MEMARG, 0xFD, 93) + +// Conversion: demote/promote (no sign specifier) +INST2(f32x4_demote_f64x2_zero, "f32x4.demote_f64x2_zero", 0, IF_OPCODE, 0xFD, 94) +INST2(f64x2_promote_low_f32x4, "f64x2.promote_low_f32x4", 0, IF_OPCODE, 0xFD, 95) + +// i8x16 arithmetic and other operations +INST2(i8x16_abs, "i8x16.abs", 0, IF_OPCODE, 0xFD, 96) +INST2(i8x16_neg, "i8x16.neg", 0, IF_OPCODE, 0xFD, 97) +INST2(i8x16_popcnt, "i8x16.popcnt", 0, IF_OPCODE, 0xFD, 98) +INST2(i8x16_all_true, "i8x16.all_true", 0, IF_OPCODE, 0xFD, 99) +INST2(i8x16_bitmask, "i8x16.bitmask", 0, IF_OPCODE, 0xFD, 100) +INST2(i8x16_narrow_i16x8_s, "i8x16.narrow_i16x8_s", 0, IF_OPCODE, 0xFD, 101) +INST2(i8x16_narrow_i16x8_u, "i8x16.narrow_i16x8_u", 0, IF_OPCODE, 0xFD, 102) +INST2(f32x4_ceil, "f32x4.ceil", 0, IF_OPCODE, 0xFD, 103) +INST2(f32x4_floor, "f32x4.floor", 0, IF_OPCODE, 0xFD, 104) +INST2(f32x4_trunc, "f32x4.trunc", 0, IF_OPCODE, 0xFD, 105) +INST2(f32x4_nearest, "f32x4.nearest", 0, IF_OPCODE, 0xFD, 106) +INST2(i8x16_shl, "i8x16.shl", 0, IF_OPCODE, 0xFD, 107) +INST2(i8x16_shr_s, "i8x16.shr_s", 0, IF_OPCODE, 0xFD, 108) +INST2(i8x16_shr_u, "i8x16.shr_u", 0, IF_OPCODE, 0xFD, 109) +INST2(i8x16_add, "i8x16.add", 0, IF_OPCODE, 0xFD, 110) +INST2(i8x16_add_sat_s, "i8x16.add_sat_s", 0, IF_OPCODE, 0xFD, 111) +INST2(i8x16_add_sat_u, "i8x16.add_sat_u", 0, IF_OPCODE, 0xFD, 112) +INST2(i8x16_sub, "i8x16.sub", 0, IF_OPCODE, 0xFD, 113) +INST2(i8x16_sub_sat_s, "i8x16.sub_sat_s", 0, IF_OPCODE, 0xFD, 114) +INST2(i8x16_sub_sat_u, "i8x16.sub_sat_u", 0, IF_OPCODE, 0xFD, 115) +INST2(f64x2_ceil, "f64x2.ceil", 0, IF_OPCODE, 0xFD, 116) +INST2(f64x2_floor, "f64x2.floor", 0, IF_OPCODE, 0xFD, 117) +INST2(i8x16_min_s, "i8x16.min_s", 0, IF_OPCODE, 0xFD, 118) +INST2(i8x16_min_u, "i8x16.min_u", 0, IF_OPCODE, 0xFD, 119) +INST2(i8x16_max_s, "i8x16.max_s", 0, IF_OPCODE, 0xFD, 120) +INST2(i8x16_max_u, "i8x16.max_u", 0, IF_OPCODE, 0xFD, 121) +INST2(f64x2_trunc, "f64x2.trunc", 0, IF_OPCODE, 0xFD, 122) +INST2(i8x16_avgr_u, "i8x16.avgr_u", 0, IF_OPCODE, 0xFD, 123) + +// i16x8 operations +INST2(i16x8_extadd_pairwise_s_i8x16, "i16x8.extadd_pairwise_s_i8x16", 0, IF_OPCODE, 0xFD, 124) +INST2(i16x8_extadd_pairwise_u_i8x16, "i16x8.extadd_pairwise_u_i8x16", 0, IF_OPCODE, 0xFD, 125) +INST2(i32x4_extadd_pairwise_s_i16x8, "i32x4.extadd_pairwise_s_i16x8", 0, IF_OPCODE, 0xFD, 126) +INST2(i32x4_extadd_pairwise_u_i16x8, "i32x4.extadd_pairwise_u_i16x8", 0, IF_OPCODE, 0xFD, 127) +INST2(i16x8_abs, "i16x8.abs", 0, IF_OPCODE, 0xFD, 128) +INST2(i16x8_neg, "i16x8.neg", 0, IF_OPCODE, 0xFD, 129) +INST2(i16x8_q15mulr_sat_s, "i16x8.q15mulr_sat_s", 0, IF_OPCODE, 0xFD, 130) +INST2(i16x8_all_true, "i16x8.all_true", 0, IF_OPCODE, 0xFD, 131) +INST2(i16x8_bitmask, "i16x8.bitmask", 0, IF_OPCODE, 0xFD, 132) +INST2(i16x8_narrow_i32x4_s, "i16x8.narrow_i32x4_s", 0, IF_OPCODE, 0xFD, 133) +INST2(i16x8_narrow_i32x4_u, "i16x8.narrow_i32x4_u", 0, IF_OPCODE, 0xFD, 134) +INST2(i16x8_extend_low_s_i8x16, "i16x8.extend_low_s_i8x16", 0, IF_OPCODE, 0xFD, 135) +INST2(i16x8_extend_high_s_i8x16, "i16x8.extend_high_s_i8x16", 0, IF_OPCODE, 0xFD, 136) +INST2(i16x8_extend_low_u_i8x16, "i16x8.extend_low_u_i8x16", 0, IF_OPCODE, 0xFD, 137) +INST2(i16x8_extend_high_u_i8x16, "i16x8.extend_high_u_i8x16", 0, IF_OPCODE, 0xFD, 138) +INST2(i16x8_shl, "i16x8.shl", 0, IF_OPCODE, 0xFD, 139) +INST2(i16x8_shr_s, "i16x8.shr_s", 0, IF_OPCODE, 0xFD, 140) +INST2(i16x8_shr_u, "i16x8.shr_u", 0, IF_OPCODE, 0xFD, 141) +INST2(i16x8_add, "i16x8.add", 0, IF_OPCODE, 0xFD, 142) +INST2(i16x8_add_sat_s, "i16x8.add_sat_s", 0, IF_OPCODE, 0xFD, 143) +INST2(i16x8_add_sat_u, "i16x8.add_sat_u", 0, IF_OPCODE, 0xFD, 144) +INST2(i16x8_sub, "i16x8.sub", 0, IF_OPCODE, 0xFD, 145) +INST2(i16x8_sub_sat_s, "i16x8.sub_sat_s", 0, IF_OPCODE, 0xFD, 146) +INST2(i16x8_sub_sat_u, "i16x8.sub_sat_u", 0, IF_OPCODE, 0xFD, 147) +INST2(f64x2_nearest, "f64x2.nearest", 0, IF_OPCODE, 0xFD, 148) +INST2(i16x8_mul, "i16x8.mul", 0, IF_OPCODE, 0xFD, 149) +INST2(i16x8_min_s, "i16x8.min_s", 0, IF_OPCODE, 0xFD, 150) +INST2(i16x8_min_u, "i16x8.min_u", 0, IF_OPCODE, 0xFD, 151) +INST2(i16x8_max_s, "i16x8.max_s", 0, IF_OPCODE, 0xFD, 152) +INST2(i16x8_max_u, "i16x8.max_u", 0, IF_OPCODE, 0xFD, 153) +INST2(i16x8_avgr_u, "i16x8.avgr_u", 0, IF_OPCODE, 0xFD, 155) +INST2(i16x8_extmul_low_s_i8x16, "i16x8.extmul_low_s_i8x16", 0, IF_OPCODE, 0xFD, 156) +INST2(i16x8_extmul_high_s_i8x16, "i16x8.extmul_high_s_i8x16", 0, IF_OPCODE, 0xFD, 157) +INST2(i16x8_extmul_low_u_i8x16, "i16x8.extmul_low_u_i8x16", 0, IF_OPCODE, 0xFD, 158) +INST2(i16x8_extmul_high_u_i8x16, "i16x8.extmul_high_u_i8x16", 0, IF_OPCODE, 0xFD, 159) + +// i32x4 operations +INST2(i32x4_abs, "i32x4.abs", 0, IF_OPCODE, 0xFD, 160) +INST2(i32x4_neg, "i32x4.neg", 0, IF_OPCODE, 0xFD, 161) +INST2(i32x4_all_true, "i32x4.all_true", 0, IF_OPCODE, 0xFD, 163) +INST2(i32x4_bitmask, "i32x4.bitmask", 0, IF_OPCODE, 0xFD, 164) +INST2(i32x4_extend_low_s_i16x8, "i32x4.extend_low_s_i16x8", 0, IF_OPCODE, 0xFD, 167) +INST2(i32x4_extend_high_s_i16x8, "i32x4.extend_high_s_i16x8", 0, IF_OPCODE, 0xFD, 168) +INST2(i32x4_extend_low_u_i16x8, "i32x4.extend_low_u_i16x8", 0, IF_OPCODE, 0xFD, 169) +INST2(i32x4_extend_high_u_i16x8, "i32x4.extend_high_u_i16x8", 0, IF_OPCODE, 0xFD, 170) +INST2(i32x4_shl, "i32x4.shl", 0, IF_OPCODE, 0xFD, 171) +INST2(i32x4_shr_s, "i32x4.shr_s", 0, IF_OPCODE, 0xFD, 172) +INST2(i32x4_shr_u, "i32x4.shr_u", 0, IF_OPCODE, 0xFD, 173) +INST2(i32x4_add, "i32x4.add", 0, IF_OPCODE, 0xFD, 174) +INST2(i32x4_sub, "i32x4.sub", 0, IF_OPCODE, 0xFD, 177) +INST2(i32x4_mul, "i32x4.mul", 0, IF_OPCODE, 0xFD, 181) +INST2(i32x4_min_s, "i32x4.min_s", 0, IF_OPCODE, 0xFD, 182) +INST2(i32x4_min_u, "i32x4.min_u", 0, IF_OPCODE, 0xFD, 183) +INST2(i32x4_max_s, "i32x4.max_s", 0, IF_OPCODE, 0xFD, 184) +INST2(i32x4_max_u, "i32x4.max_u", 0, IF_OPCODE, 0xFD, 185) +INST2(i32x4_dot_i16x8_s, "i32x4.dot_i16x8_s", 0, IF_OPCODE, 0xFD, 186) +INST2(i32x4_extmul_low_s_i16x8, "i32x4.extmul_low_s_i16x8", 0, IF_OPCODE, 0xFD, 188) +INST2(i32x4_extmul_high_s_i16x8, "i32x4.extmul_high_s_i16x8", 0, IF_OPCODE, 0xFD, 189) +INST2(i32x4_extmul_low_u_i16x8, "i32x4.extmul_low_u_i16x8", 0, IF_OPCODE, 0xFD, 190) +INST2(i32x4_extmul_high_u_i16x8, "i32x4.extmul_high_u_i16x8", 0, IF_OPCODE, 0xFD, 191) + +// i64x2 operations +INST2(i64x2_abs, "i64x2.abs", 0, IF_OPCODE, 0xFD, 192) +INST2(i64x2_neg, "i64x2.neg", 0, IF_OPCODE, 0xFD, 193) +INST2(i64x2_all_true, "i64x2.all_true", 0, IF_OPCODE, 0xFD, 195) +INST2(i64x2_bitmask, "i64x2.bitmask", 0, IF_OPCODE, 0xFD, 196) +INST2(i64x2_extend_low_s_i32x4, "i64x2.extend_low_s_i32x4", 0, IF_OPCODE, 0xFD, 199) +INST2(i64x2_extend_high_s_i32x4, "i64x2.extend_high_s_i32x4", 0, IF_OPCODE, 0xFD, 200) +INST2(i64x2_extend_low_u_i32x4, "i64x2.extend_low_u_i32x4", 0, IF_OPCODE, 0xFD, 201) +INST2(i64x2_extend_high_u_i32x4, "i64x2.extend_high_u_i32x4", 0, IF_OPCODE, 0xFD, 202) +INST2(i64x2_shl, "i64x2.shl", 0, IF_OPCODE, 0xFD, 203) +INST2(i64x2_shr_s, "i64x2.shr_s", 0, IF_OPCODE, 0xFD, 204) +INST2(i64x2_shr_u, "i64x2.shr_u", 0, IF_OPCODE, 0xFD, 205) +INST2(i64x2_add, "i64x2.add", 0, IF_OPCODE, 0xFD, 206) +INST2(i64x2_sub, "i64x2.sub", 0, IF_OPCODE, 0xFD, 209) +INST2(i64x2_mul, "i64x2.mul", 0, IF_OPCODE, 0xFD, 213) +INST2(i64x2_extmul_low_s_i32x4, "i64x2.extmul_low_s_i32x4", 0, IF_OPCODE, 0xFD, 220) +INST2(i64x2_extmul_high_s_i32x4, "i64x2.extmul_high_s_i32x4", 0, IF_OPCODE, 0xFD, 221) +INST2(i64x2_extmul_low_u_i32x4, "i64x2.extmul_low_u_i32x4", 0, IF_OPCODE, 0xFD, 222) +INST2(i64x2_extmul_high_u_i32x4, "i64x2.extmul_high_u_i32x4", 0, IF_OPCODE, 0xFD, 223) + +// f32x4 arithmetic +INST2(f32x4_abs, "f32x4.abs", 0, IF_OPCODE, 0xFD, 224) +INST2(f32x4_neg, "f32x4.neg", 0, IF_OPCODE, 0xFD, 225) +INST2(f32x4_sqrt, "f32x4.sqrt", 0, IF_OPCODE, 0xFD, 227) +INST2(f32x4_add, "f32x4.add", 0, IF_OPCODE, 0xFD, 228) +INST2(f32x4_sub, "f32x4.sub", 0, IF_OPCODE, 0xFD, 229) +INST2(f32x4_mul, "f32x4.mul", 0, IF_OPCODE, 0xFD, 230) +INST2(f32x4_div, "f32x4.div", 0, IF_OPCODE, 0xFD, 231) +INST2(f32x4_min, "f32x4.min", 0, IF_OPCODE, 0xFD, 232) +INST2(f32x4_max, "f32x4.max", 0, IF_OPCODE, 0xFD, 233) +INST2(f32x4_pmin, "f32x4.pmin", 0, IF_OPCODE, 0xFD, 234) +INST2(f32x4_pmax, "f32x4.pmax", 0, IF_OPCODE, 0xFD, 235) + +// f64x2 arithmetic +INST2(f64x2_abs, "f64x2.abs", 0, IF_OPCODE, 0xFD, 236) +INST2(f64x2_neg, "f64x2.neg", 0, IF_OPCODE, 0xFD, 237) +INST2(f64x2_sqrt, "f64x2.sqrt", 0, IF_OPCODE, 0xFD, 239) +INST2(f64x2_add, "f64x2.add", 0, IF_OPCODE, 0xFD, 240) +INST2(f64x2_sub, "f64x2.sub", 0, IF_OPCODE, 0xFD, 241) +INST2(f64x2_mul, "f64x2.mul", 0, IF_OPCODE, 0xFD, 242) +INST2(f64x2_div, "f64x2.div", 0, IF_OPCODE, 0xFD, 243) +INST2(f64x2_min, "f64x2.min", 0, IF_OPCODE, 0xFD, 244) +INST2(f64x2_max, "f64x2.max", 0, IF_OPCODE, 0xFD, 245) +INST2(f64x2_pmin, "f64x2.pmin", 0, IF_OPCODE, 0xFD, 246) +INST2(f64x2_pmax, "f64x2.pmax", 0, IF_OPCODE, 0xFD, 247) + +// Conversion operations (sign specifier before source type per spec) +INST2(i32x4_trunc_sat_s_f32x4, "i32x4.trunc_sat_s_f32x4", 0, IF_OPCODE, 0xFD, 248) +INST2(i32x4_trunc_sat_u_f32x4, "i32x4.trunc_sat_u_f32x4", 0, IF_OPCODE, 0xFD, 249) +INST2(f32x4_convert_s_i32x4, "f32x4.convert_s_i32x4", 0, IF_OPCODE, 0xFD, 250) +INST2(f32x4_convert_u_i32x4, "f32x4.convert_u_i32x4", 0, IF_OPCODE, 0xFD, 251) +INST2(i32x4_trunc_sat_s_f64x2_zero, "i32x4.trunc_sat_s_f64x2_zero", 0, IF_OPCODE, 0xFD, 252) +INST2(i32x4_trunc_sat_u_f64x2_zero, "i32x4.trunc_sat_u_f64x2_zero", 0, IF_OPCODE, 0xFD, 253) +INST2(f64x2_convert_low_s_i32x4, "f64x2.convert_low_s_i32x4", 0, IF_OPCODE, 0xFD, 254) +INST2(f64x2_convert_low_u_i32x4, "f64x2.convert_low_u_i32x4", 0, IF_OPCODE, 0xFD, 255) + // clang-format on #undef INST From 21f74bc760a4938ef4b3f03b42251f2278367361 Mon Sep 17 00:00:00 2001 From: Adam Perlin Date: Mon, 11 May 2026 13:09:07 -0700 Subject: [PATCH 02/22] Add debug emitter unit tests for Wasm packed SIMD encodings Add genWasmEmitterUnitTestsSimd() to exercise the new SIMD instruction emit functions (emitIns_V128Const, emitIns_Lane, emitIns_MemargLane, emitIns_Shuffle) and representative plain-opcode SIMD instructions. Activated via DOTNET_JitEmitUnitTests=* and DOTNET_JitEmitUnitTestsSections=simd (or all). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/codegen.h | 4 + src/coreclr/jit/codegenlinear.cpp | 7 ++ src/coreclr/jit/codegenwasm.cpp | 121 ++++++++++++++++++++++++++++++ 3 files changed, 132 insertions(+) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index bd15513f199d71..a4730c4bcd2f2a 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -639,6 +639,10 @@ class CodeGen final : public CodeGenInterface void genAmd64EmitterUnitTestsCCMP(); #endif +#if defined(TARGET_WASM) + void genWasmEmitterUnitTestsSimd(); +#endif + #endif // defined(DEBUG) #ifdef TARGET_ARM64 diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index b93ca108bb29b4..494b16fca91d69 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2703,6 +2703,7 @@ void CodeGen::genCodeForSetcc(GenTreeCC* setcc) * Possible values for JitEmitUnitTestsSections: * Amd64: all, sse2 * Arm64: all, general, advsimd, sve + * Wasm: all, simd */ #if defined(DEBUG) @@ -2769,6 +2770,12 @@ void CodeGen::genEmitterUnitTests() { genArm64EmitterUnitTestsPac(); } + +#elif defined(TARGET_WASM) + if (unitTestSectionAll || (strstr(unitTestSection, "simd") != nullptr)) + { + genWasmEmitterUnitTestsSimd(); + } #endif genDefineTempLabel(skipLabel); diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 0d89746cf97da6..cd10ffcd10718a 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -3361,6 +3361,127 @@ void CodeGen::inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock) GetEmitter()->emitIns_J(instr, EA_4BYTE, depth, tgtBlock); } +#if defined(DEBUG) + +//------------------------------------------------------------------------ +// genWasmEmitterUnitTestsSimd: Exercise the packed SIMD instruction emit +// functions added for Wasm (v128.const, extract/replace lane, shuffle, +// load/store lane, and plain-opcode SIMD instructions). +// +// This is a temporary debug-only test that verifies the encoding paths +// do not assert or crash. The emitted instructions are not intended to +// form a valid Wasm program. +// +void CodeGen::genWasmEmitterUnitTestsSimd() +{ + emitter* emit = GetEmitter(); + + // --- IF_V128_CONST: v128.const with 16 raw bytes --- + const uint8_t v128Bytes[16] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F}; + emit->emitIns_V128Const(INS_v128_const, v128Bytes); + + // All-zeros and all-ones constants + const uint8_t v128Zeros[16] = {0}; + emit->emitIns_V128Const(INS_v128_const, v128Zeros); + + const uint8_t v128Ones[16] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; + emit->emitIns_V128Const(INS_v128_const, v128Ones); + + // --- IF_LANE: extract/replace lane instructions --- + // i8x16 lanes (0..15) + emit->emitIns_Lane(INS_i8x16_extract_lane_s, EA_1BYTE, 0); + emit->emitIns_Lane(INS_i8x16_extract_lane_u, EA_1BYTE, 15); + emit->emitIns_Lane(INS_i8x16_replace_lane, EA_1BYTE, 7); + + // i16x8 lanes (0..7) + emit->emitIns_Lane(INS_i16x8_extract_lane_s, EA_2BYTE, 0); + emit->emitIns_Lane(INS_i16x8_extract_lane_u, EA_2BYTE, 7); + emit->emitIns_Lane(INS_i16x8_replace_lane, EA_2BYTE, 3); + + // i32x4 lanes (0..3) + emit->emitIns_Lane(INS_i32x4_extract_lane, EA_4BYTE, 0); + emit->emitIns_Lane(INS_i32x4_replace_lane, EA_4BYTE, 3); + + // i64x2 lanes (0..1) + emit->emitIns_Lane(INS_i64x2_extract_lane, EA_8BYTE, 0); + emit->emitIns_Lane(INS_i64x2_replace_lane, EA_8BYTE, 1); + + // f32x4 lanes (0..3) + emit->emitIns_Lane(INS_f32x4_extract_lane, EA_4BYTE, 2); + emit->emitIns_Lane(INS_f32x4_replace_lane, EA_4BYTE, 1); + + // f64x2 lanes (0..1) + emit->emitIns_Lane(INS_f64x2_extract_lane, EA_8BYTE, 0); + emit->emitIns_Lane(INS_f64x2_replace_lane, EA_8BYTE, 1); + + // --- IF_MEMARG_LANE: load/store lane with memarg --- + emit->emitIns_MemargLane(INS_v128_load8_lane, EA_1BYTE, 0, 5); + emit->emitIns_MemargLane(INS_v128_load16_lane, EA_2BYTE, 16, 3); + emit->emitIns_MemargLane(INS_v128_load32_lane, EA_4BYTE, 64, 2); + emit->emitIns_MemargLane(INS_v128_load64_lane, EA_8BYTE, 128, 1); + emit->emitIns_MemargLane(INS_v128_store8_lane, EA_1BYTE, 0, 0); + emit->emitIns_MemargLane(INS_v128_store16_lane, EA_2BYTE, 8, 7); + emit->emitIns_MemargLane(INS_v128_store32_lane, EA_4BYTE, 32, 1); + emit->emitIns_MemargLane(INS_v128_store64_lane, EA_8BYTE, 256, 0); + + // --- IF_SHUFFLE: i8x16.shuffle with 16 lane-index bytes --- + // Identity shuffle + const uint8_t identityShuffle[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + emit->emitIns_Shuffle(INS_i8x16_shuffle, identityShuffle); + + // Reverse bytes + const uint8_t reverseShuffle[16] = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; + emit->emitIns_Shuffle(INS_i8x16_shuffle, reverseShuffle); + + // Cross-operand shuffle (indices 16..31 refer to the second operand) + const uint8_t crossShuffle[16] = {0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31}; + emit->emitIns_Shuffle(INS_i8x16_shuffle, crossShuffle); + + // --- IF_OPCODE: plain opcode SIMD instructions (representative sample) --- + // Splat operations + emit->emitIns(INS_i8x16_splat); + emit->emitIns(INS_i16x8_splat); + emit->emitIns(INS_i32x4_splat); + emit->emitIns(INS_i64x2_splat); + emit->emitIns(INS_f32x4_splat); + emit->emitIns(INS_f64x2_splat); + + // Swizzle + emit->emitIns(INS_i8x16_swizzle); + + // A few comparisons + emit->emitIns(INS_i8x16_eq); + emit->emitIns(INS_i32x4_ne); + emit->emitIns(INS_f64x2_lt); + + // A few arithmetic ops + emit->emitIns(INS_i8x16_add); + emit->emitIns(INS_i32x4_mul); + emit->emitIns(INS_f32x4_sqrt); + emit->emitIns(INS_f64x2_neg); + + // Bitwise ops + emit->emitIns(INS_v128_not); + emit->emitIns(INS_v128_and); + emit->emitIns(INS_v128_or); + emit->emitIns(INS_v128_xor); + emit->emitIns(INS_v128_andnot); + + // Bitmask / any_true / all_true + emit->emitIns(INS_v128_any_true); + emit->emitIns(INS_i8x16_all_true); + emit->emitIns(INS_i32x4_bitmask); + + // Conversion operations + emit->emitIns(INS_f32x4_convert_s_i32x4); + emit->emitIns(INS_f64x2_convert_low_u_i32x4); + emit->emitIns(INS_i32x4_trunc_sat_s_f32x4); +} + +#endif // defined(DEBUG) + void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* code)) { IAllocator* allowZeroAlloc = new (m_compiler, CMK_GC) CompIAllocator(m_compiler->getAllocatorGC()); From 93d4d06bff1cf47ff17a19536ac7639e46bfc882 Mon Sep 17 00:00:00 2001 From: Adam Perlin Date: Tue, 12 May 2026 15:44:00 -0700 Subject: [PATCH 03/22] Add temp emitter unit test block to wasm control flow stack and set ordering number for appropriate branching --- src/coreclr/jit/codegenlinear.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 494b16fca91d69..bb3d4f9bfb2bf4 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -18,7 +18,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "codegen.h" #if defined(TARGET_WASM) -class WasmInterval; +#include "fgwasm.h" #endif //------------------------------------------------------------------------ @@ -2728,6 +2728,12 @@ void CodeGen::genEmitterUnitTests() // Jump over the generated tests as they are not intended to be run. BasicBlock* skipLabel = genCreateTempLabel(); +#ifdef TARGET_WASM + skipLabel->bbPreorderNum = m_compiler->compCurBB->bbPreorderNum + 1; + genDefineTempLabel(skipLabel); + WasmInterval* skipInterval = WasmInterval::NewBlock(m_compiler, skipLabel, skipLabel); + this->wasmControlFlowStack->Push(skipInterval); +#endif inst_JMP(EJ_jmp, skipLabel); // Add NOPs at the start and end for easier script parsing. @@ -2778,7 +2784,9 @@ void CodeGen::genEmitterUnitTests() } #endif +#ifndef TARGET_WASM genDefineTempLabel(skipLabel); +#endif instGen(INS_nop); instGen(INS_nop); instGen(INS_nop); From f4c184479b97d64ea03b2267e9318a52c45aa9f6 Mon Sep 17 00:00:00 2001 From: Adam Perlin Date: Wed, 13 May 2026 16:33:31 -0700 Subject: [PATCH 04/22] Use the same instrDesc for v128.const and v128.shuffle Update emitDispIns to support new SIMD instructions Properly emit block ... end in emitter unit tests for Wasm --- src/coreclr/jit/codegenlinear.cpp | 4 +- src/coreclr/jit/codegenwasm.cpp | 18 +++-- src/coreclr/jit/emit.h | 50 ++++---------- src/coreclr/jit/emitfmtswasm.h | 3 +- src/coreclr/jit/emitwasm.cpp | 108 +++++++++++++++++------------- src/coreclr/jit/emitwasm.h | 7 +- src/coreclr/jit/instrswasm.h | 4 +- 7 files changed, 93 insertions(+), 101 deletions(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index bb3d4f9bfb2bf4..08fc54e4d16f68 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2733,6 +2733,7 @@ void CodeGen::genEmitterUnitTests() genDefineTempLabel(skipLabel); WasmInterval* skipInterval = WasmInterval::NewBlock(m_compiler, skipLabel, skipLabel); this->wasmControlFlowStack->Push(skipInterval); + instGen(INS_block); #endif inst_JMP(EJ_jmp, skipLabel); @@ -2781,12 +2782,11 @@ void CodeGen::genEmitterUnitTests() if (unitTestSectionAll || (strstr(unitTestSection, "simd") != nullptr)) { genWasmEmitterUnitTestsSimd(); + instGen(INS_end); } #endif -#ifndef TARGET_WASM genDefineTempLabel(skipLabel); -#endif instGen(INS_nop); instGen(INS_nop); instGen(INS_nop); diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 02cd6b5afa7a92..3ff946d2f0b349 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -3402,26 +3402,24 @@ void CodeGen::genWasmEmitterUnitTestsSimd() // --- IF_V128_CONST: v128.const with 16 raw bytes --- const uint8_t v128Bytes[16] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F}; - emit->emitIns_V128Const(INS_v128_const, v128Bytes); + emit->emitIns_V128Imm(INS_v128_const, v128Bytes); // All-zeros and all-ones constants const uint8_t v128Zeros[16] = {0}; - emit->emitIns_V128Const(INS_v128_const, v128Zeros); + emit->emitIns_V128Imm(INS_v128_const, v128Zeros); const uint8_t v128Ones[16] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; - emit->emitIns_V128Const(INS_v128_const, v128Ones); + emit->emitIns_V128Imm(INS_v128_const, v128Ones); // --- IF_LANE: extract/replace lane instructions --- // i8x16 lanes (0..15) emit->emitIns_Lane(INS_i8x16_extract_lane_s, EA_1BYTE, 0); emit->emitIns_Lane(INS_i8x16_extract_lane_u, EA_1BYTE, 15); - emit->emitIns_Lane(INS_i8x16_replace_lane, EA_1BYTE, 7); // i16x8 lanes (0..7) emit->emitIns_Lane(INS_i16x8_extract_lane_s, EA_2BYTE, 0); emit->emitIns_Lane(INS_i16x8_extract_lane_u, EA_2BYTE, 7); - emit->emitIns_Lane(INS_i16x8_replace_lane, EA_2BYTE, 3); // i32x4 lanes (0..3) emit->emitIns_Lane(INS_i32x4_extract_lane, EA_4BYTE, 0); @@ -3432,8 +3430,8 @@ void CodeGen::genWasmEmitterUnitTestsSimd() emit->emitIns_Lane(INS_i64x2_replace_lane, EA_8BYTE, 1); // f32x4 lanes (0..3) - emit->emitIns_Lane(INS_f32x4_extract_lane, EA_4BYTE, 2); - emit->emitIns_Lane(INS_f32x4_replace_lane, EA_4BYTE, 1); + emit->emitIns_Lane(INS_f32x4_extract_lane, EA_4BYTE, 3); + emit->emitIns_Lane(INS_f32x4_replace_lane, EA_4BYTE, 0); // f64x2 lanes (0..1) emit->emitIns_Lane(INS_f64x2_extract_lane, EA_8BYTE, 0); @@ -3452,15 +3450,15 @@ void CodeGen::genWasmEmitterUnitTestsSimd() // --- IF_SHUFFLE: i8x16.shuffle with 16 lane-index bytes --- // Identity shuffle const uint8_t identityShuffle[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; - emit->emitIns_Shuffle(INS_i8x16_shuffle, identityShuffle); + emit->emitIns_V128Imm(INS_i8x16_shuffle, identityShuffle); // Reverse bytes const uint8_t reverseShuffle[16] = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; - emit->emitIns_Shuffle(INS_i8x16_shuffle, reverseShuffle); + emit->emitIns_V128Imm(INS_i8x16_shuffle, reverseShuffle); // Cross-operand shuffle (indices 16..31 refer to the second operand) const uint8_t crossShuffle[16] = {0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31}; - emit->emitIns_Shuffle(INS_i8x16_shuffle, crossShuffle); + emit->emitIns_V128Imm(INS_i8x16_shuffle, crossShuffle); // --- IF_OPCODE: plain opcode SIMD instructions (representative sample) --- // Splat operations diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index f6a738a009b226..e4c84f1143ffc3 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -1328,6 +1328,17 @@ class emitter { return _idInsFmt == IF_TRY_TABLE; } + + bool idIsV128Imm() const + { + return _idInsFmt == IF_V128; + } + + bool idIsMemargLaneImm() const + { + return _idInsFmt == IF_MEMARG_LANE; + } + #endif #ifdef TARGET_ARM64 @@ -2422,9 +2433,9 @@ class emitter } }; - struct instrDescV128Const : instrDesc + struct instrDescV128Imm : instrDesc { - instrDescV128Const() = delete; + instrDescV128Imm() = delete; uint8_t v128Bytes[16]; @@ -2440,41 +2451,6 @@ class emitter } }; - struct instrDescShuffle : instrDesc - { - instrDescShuffle() = delete; - - uint8_t shuffleLanes[16]; - - void idShuffleLanes(const uint8_t* lanes) - { - assert(lanes != nullptr); - memcpy(shuffleLanes, lanes, 16); - } - - const uint8_t* idShuffleLanes() const - { - return shuffleLanes; - } - }; - - struct instrDescLane : instrDesc - { - instrDescLane() = delete; - - uint8_t lane; - - void idLaneIdx(uint8_t idx) - { - lane = idx; - } - - uint8_t idLaneIdx() const - { - return lane; - } - }; - struct instrDescMemargLane : instrDescCns { instrDescMemargLane() = delete; diff --git a/src/coreclr/jit/emitfmtswasm.h b/src/coreclr/jit/emitfmtswasm.h index aa34ed6c06ef7b..1889e7cc6568d2 100644 --- a/src/coreclr/jit/emitfmtswasm.h +++ b/src/coreclr/jit/emitfmtswasm.h @@ -44,10 +44,9 @@ IF_DEF(CALL_INDIRECT, IS_NONE, NONE) // IF_DEF(TRY_TABLE, IS_NONE, NONE) // IF_DEF(CATCH_DECL, IS_NONE, NONE) // -IF_DEF(V128_CONST, IS_NONE, NONE) // <16 raw bytes> +IF_DEF(V128, IS_NONE, NONE) // <16 raw bytes> IF_DEF(LANE, IS_NONE, NONE) // IF_DEF(MEMARG_LANE, IS_NONE, NONE) // -IF_DEF(SHUFFLE, IS_NONE, NONE) // <16 lane-index bytes> #undef IF_DEF #endif // !DEFINE_ID_OPS diff --git a/src/coreclr/jit/emitwasm.cpp b/src/coreclr/jit/emitwasm.cpp index 768ad1a7bc691f..fb1627c3fdd680 100644 --- a/src/coreclr/jit/emitwasm.cpp +++ b/src/coreclr/jit/emitwasm.cpp @@ -366,6 +366,18 @@ unsigned int emitter::emitGetValTypeImmImm(const instrDesc* id) return static_cast(id)->imm; } +const uint8_t* emitter::emitGetV128ImmValue(const instrDesc* id) +{ + assert(id->idIsV128Imm()); + return static_cast(id)->v128Bytes; +} + +const uint8_t emitter::emitGetLaneImmValue(const instrDesc* id) +{ + assert(id->idIsMemargLaneImm()); + return static_cast(id)->lane; +} + //------------------------------------------------------------------------ // Packed SIMD instruction emit functions //------------------------------------------------------------------------ @@ -377,12 +389,12 @@ unsigned int emitter::emitGetValTypeImmImm(const instrDesc* id) // ins - instruction (INS_v128_const) // bytes - pointer to 16 bytes of constant data // -void emitter::emitIns_V128Const(instruction ins, const uint8_t* bytes) +void emitter::emitIns_V128Imm(instruction ins, const uint8_t* bytes) { assert(bytes != nullptr); - instrDescV128Const* id = static_cast(emitAllocAnyInstr(sizeof(instrDescV128Const), EA_16BYTE)); + instrDescV128Imm* id = static_cast(emitAllocAnyInstr(sizeof(instrDescV128Imm), EA_16BYTE)); id->idIns(ins); - id->idInsFmt(IF_V128_CONST); + id->idInsFmt(IF_V128); id->idV128Const(bytes); dispIns(id); @@ -399,10 +411,10 @@ void emitter::emitIns_V128Const(instruction ins, const uint8_t* bytes) // void emitter::emitIns_Lane(instruction ins, emitAttr attr, uint8_t laneIdx) { - instrDescLane* id = static_cast(emitAllocAnyInstr(sizeof(instrDescLane), attr)); + instrDesc* id = emitNewInstr(attr); id->idIns(ins); id->idInsFmt(IF_LANE); - id->idLaneIdx(laneIdx); + id->idSmallCns(laneIdx); dispIns(id); appendToCurIG(id); @@ -430,25 +442,6 @@ void emitter::emitIns_MemargLane(instruction ins, emitAttr attr, cnsval_ssize_t appendToCurIG(id); } -//------------------------------------------------------------------------ -// emitIns_Shuffle: Emit an i8x16.shuffle instruction with 16 lane-index bytes. -// -// Arguments: -// ins - instruction (INS_i8x16_shuffle) -// laneIndices - pointer to 16 lane index bytes -// -void emitter::emitIns_Shuffle(instruction ins, const uint8_t* laneIndices) -{ - assert(laneIndices != nullptr); - instrDescShuffle* id = static_cast(emitAllocAnyInstr(sizeof(instrDescShuffle), EA_16BYTE)); - id->idIns(ins); - id->idInsFmt(IF_SHUFFLE); - id->idShuffleLanes(laneIndices); - - dispIns(id); - appendToCurIG(id); -} - emitter::insFormat emitter::emitInsFormat(instruction ins) { static_assert(IF_COUNT < 255); @@ -517,17 +510,14 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id) const return sizeof(instrDescValTypeImm); } + // SIMD cases switch (id->idInsFmt()) { - case IF_V128_CONST: - return sizeof(instrDescV128Const); - case IF_SHUFFLE: - return sizeof(instrDescShuffle); - case IF_LANE: - return sizeof(instrDescLane); + case IF_V128: + return sizeof(instrDescV128Imm); case IF_MEMARG_LANE: return sizeof(instrDescMemargLane); - default: + default: // IF_LANE can fit in a standard instrDesc break; } @@ -661,7 +651,7 @@ unsigned emitter::instrDesc::idCodeSize() const size += SizeOfULEB128(emitGetInsSC(this)); // control flow stack offset break; } - case IF_V128_CONST: + case IF_V128: size += 16; // 16 raw bytes for the v128 constant break; case IF_LANE: @@ -675,9 +665,6 @@ unsigned emitter::instrDesc::idCodeSize() const size += 1; // 1 byte lane index break; } - case IF_SHUFFLE: - size += 16; // 16 lane-index bytes - break; default: unreached(); } @@ -981,18 +968,19 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutputULEB128Padded(dst, (int64_t)size); break; } - case IF_V128_CONST: + case IF_V128: { dst += emitOutputOpcode(dst, ins); - const instrDescV128Const* idConst = static_cast(id); + const instrDescV128Imm* idConst = static_cast(id); dst += emitRawBytes(dst, idConst->idV128Const(), 16); break; } case IF_LANE: { dst += emitOutputOpcode(dst, ins); - const instrDescLane* idLane = static_cast(id); - dst += emitOutputByte(dst, idLane->idLaneIdx()); + cnsval_size_t laneIdx = emitGetInsSC(id); + assert(FitsIn(laneIdx)); + dst += emitOutputByte(dst, static_cast(laneIdx)); break; } case IF_MEMARG_LANE: @@ -1008,13 +996,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutputByte(dst, idMemLane->idLaneIdx()); break; } - case IF_SHUFFLE: - { - dst += emitOutputOpcode(dst, ins); - const instrDescShuffle* idShuf = static_cast(id); - dst += emitRawBytes(dst, idShuf->idShuffleLanes(), 16); - break; - } default: NYI_WASM("emitOutputInstr"); break; @@ -1264,7 +1245,9 @@ void emitter::emitDispIns( case IF_CODE_SIZE: { +#if FALSE FuncInfoDsc* const func = m_compiler->funGetFunc(emitCurIG->igFuncIdx); + assert(func != nullptr); emitLocation* const startLoc = func->startLoc; emitLocation* const endLoc = func->endLoc; @@ -1276,12 +1259,45 @@ void emitter::emitDispIns( printf(" %u", codeSize); } else +#endif { printf(" "); } } break; + case IF_V128: + { + const uint8_t* imm = emitGetV128ImmValue(id); + printf(" 0x"); + for (int i = 15; i >= 0; i--) + { + printf("%02x", imm[i]); + } + } + break; + + case IF_LANE: + { + cnsval_size_t lane = emitGetInsSC(id); + assert(FitsIn(lane)); + + printf(" %u", lane); + } + break; + + case IF_MEMARG_LANE: + { + unsigned log2align = emitGetAlignHintLog2(id); + cnsval_ssize_t offset = emitGetInsSC(id); + printf(" %u %llu", log2align, (uint64_t)offset); + dispLclVarInfoIfAny(); + + uint8_t lane = emitGetLaneImmValue(id); + printf(" %u", lane); + } + break; + default: unreached(); } diff --git a/src/coreclr/jit/emitwasm.h b/src/coreclr/jit/emitwasm.h index 411ca69c64e11a..b27a34d00a4e07 100644 --- a/src/coreclr/jit/emitwasm.h +++ b/src/coreclr/jit/emitwasm.h @@ -32,10 +32,9 @@ void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2) void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs); // Packed SIMD instruction emit functions -void emitIns_V128Const(instruction ins, const uint8_t* bytes); +void emitIns_V128Imm(instruction ins, const uint8_t* bytes); void emitIns_Lane(instruction ins, emitAttr attr, uint8_t laneIdx); void emitIns_MemargLane(instruction ins, emitAttr attr, cnsval_ssize_t offset, uint8_t laneIdx); -void emitIns_Shuffle(instruction ins, const uint8_t* laneIndices); void emitAddressConstant(void* address); @@ -52,6 +51,10 @@ instrDesc* emitNewInstrValTypeImm(emitAttr attr, WasmValueType type, u static WasmValueType emitGetValTypeImmType(const instrDesc* id); static unsigned int emitGetValTypeImmImm(const instrDesc* id); +const uint8_t* emitGetV128ImmValue(const instrDesc* id); +const uint8_t emitGetLaneImmValue(const instrDesc* id); + + /************************************************************************/ /* Private members that deal with target-dependent instr. descriptors */ /************************************************************************/ diff --git a/src/coreclr/jit/instrswasm.h b/src/coreclr/jit/instrswasm.h index 6632c4e488742f..eb45b6f19e19b5 100644 --- a/src/coreclr/jit/instrswasm.h +++ b/src/coreclr/jit/instrswasm.h @@ -258,8 +258,8 @@ INST2(v128_load64_splat, "v128.load64_splat", 0, IF_MEMARG, 0xFD, 10) INST2(v128_store, "v128.store", 0, IF_MEMARG, 0xFD, 11) // v128.const and i8x16.shuffle (special formats) -INST2(v128_const, "v128.const", 0, IF_V128_CONST, 0xFD, 12) -INST2(i8x16_shuffle, "i8x16.shuffle", 0, IF_SHUFFLE, 0xFD, 13) +INST2(v128_const, "v128.const", 0, IF_V128, 0xFD, 12) +INST2(i8x16_shuffle, "i8x16.shuffle", 0, IF_V128, 0xFD, 13) // i8x16.swizzle (plain opcode) INST2(i8x16_swizzle, "i8x16.swizzle", 0, IF_OPCODE, 0xFD, 14) From 917d76344c2aeff6fb42cbc75d97fb671beda38a Mon Sep 17 00:00:00 2001 From: Adam Perlin Date: Thu, 14 May 2026 14:20:21 -0700 Subject: [PATCH 05/22] Properly emit branch to skip over emitter unit test instructions, and fix IF_MEMARG_LANE instruction format --- src/coreclr/jit/codegenlinear.cpp | 11 ++++++----- src/coreclr/jit/emitwasm.cpp | 23 +++++++++++++++++------ 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 08fc54e4d16f68..c6d5d01d481677 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2728,14 +2728,13 @@ void CodeGen::genEmitterUnitTests() // Jump over the generated tests as they are not intended to be run. BasicBlock* skipLabel = genCreateTempLabel(); -#ifdef TARGET_WASM - skipLabel->bbPreorderNum = m_compiler->compCurBB->bbPreorderNum + 1; +#ifndef TARGET_WASM + inst_JMP(EJ_jmp, skipLabel); +#else genDefineTempLabel(skipLabel); - WasmInterval* skipInterval = WasmInterval::NewBlock(m_compiler, skipLabel, skipLabel); - this->wasmControlFlowStack->Push(skipInterval); instGen(INS_block); + GetEmitter()->emitIns_J(INS_br, EA_4BYTE, 0, skipLabel); #endif - inst_JMP(EJ_jmp, skipLabel); // Add NOPs at the start and end for easier script parsing. instGen(INS_nop); @@ -2786,7 +2785,9 @@ void CodeGen::genEmitterUnitTests() } #endif +#ifndef TARGET_WASM genDefineTempLabel(skipLabel); +#endif instGen(INS_nop); instGen(INS_nop); instGen(INS_nop); diff --git a/src/coreclr/jit/emitwasm.cpp b/src/coreclr/jit/emitwasm.cpp index fb1627c3fdd680..3422ffee229f29 100644 --- a/src/coreclr/jit/emitwasm.cpp +++ b/src/coreclr/jit/emitwasm.cpp @@ -393,8 +393,10 @@ void emitter::emitIns_V128Imm(instruction ins, const uint8_t* bytes) { assert(bytes != nullptr); instrDescV128Imm* id = static_cast(emitAllocAnyInstr(sizeof(instrDescV128Imm), EA_16BYTE)); + insFormat fmt = emitInsFormat(ins); + + id->idInsFmt(fmt); id->idIns(ins); - id->idInsFmt(IF_V128); id->idV128Const(bytes); dispIns(id); @@ -411,10 +413,11 @@ void emitter::emitIns_V128Imm(instruction ins, const uint8_t* bytes) // void emitter::emitIns_Lane(instruction ins, emitAttr attr, uint8_t laneIdx) { - instrDesc* id = emitNewInstr(attr); + instrDesc* id = emitNewInstrSC(attr, laneIdx); + insFormat fmt = emitInsFormat(ins); + + id->idInsFmt(fmt); id->idIns(ins); - id->idInsFmt(IF_LANE); - id->idSmallCns(laneIdx); dispIns(id); appendToCurIG(id); @@ -433,9 +436,12 @@ void emitter::emitIns_MemargLane(instruction ins, emitAttr attr, cnsval_ssize_t { instrDescMemargLane* id = static_cast(emitAllocAnyInstr(sizeof(instrDescMemargLane), attr)); + insFormat fmt = emitInsFormat(ins); + + id->idInsFmt(fmt); id->idIns(ins); - id->idInsFmt(IF_MEMARG_LANE); id->idcCnsVal = offset; + id->idSetIsLargeCns(); id->idLaneIdx(laneIdx); dispIns(id); @@ -486,6 +492,11 @@ static bool HasOpcodePrefix(instruction ins) return GetOpcodePrefix(ins) != 0; } +inline static bool IsWasmSimdInstruction(instruction ins) +{ + return GetOpcodePrefix(ins) == 0xFD; +} + size_t emitter::emitSizeOfInsDsc(instrDesc* id) const { if (emitIsSmallInsDsc(id)) @@ -493,7 +504,7 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id) const return SMALL_IDSC_SIZE; } - if (id->idIsLargeCns()) + if (id->idIsLargeCns() && !IsWasmSimdInstruction(id->idIns())) { assert(!id->idIsLargeDsp()); assert(!id->idIsLargeCall()); From 1111af1bc7feabd78bb744c47cd783ca14e816fe Mon Sep 17 00:00:00 2001 From: Adam Perlin Date: Thu, 14 May 2026 14:33:47 -0700 Subject: [PATCH 06/22] Remove unneeded include --- src/coreclr/jit/codegenlinear.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index c6d5d01d481677..cc086cf3711432 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -18,7 +18,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "codegen.h" #if defined(TARGET_WASM) -#include "fgwasm.h" +class WasmInterval; #endif //------------------------------------------------------------------------ From 98f0ffe9772382d2632b63b08e79847cf158d056 Mon Sep 17 00:00:00 2001 From: Adam Perlin Date: Thu, 14 May 2026 14:43:28 -0700 Subject: [PATCH 07/22] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/coreclr/jit/emitwasm.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/emitwasm.h b/src/coreclr/jit/emitwasm.h index b27a34d00a4e07..4af4bbabf06f44 100644 --- a/src/coreclr/jit/emitwasm.h +++ b/src/coreclr/jit/emitwasm.h @@ -54,7 +54,6 @@ static unsigned int emitGetValTypeImmImm(const instrDesc* id); const uint8_t* emitGetV128ImmValue(const instrDesc* id); const uint8_t emitGetLaneImmValue(const instrDesc* id); - /************************************************************************/ /* Private members that deal with target-dependent instr. descriptors */ /************************************************************************/ From bb42076534b113e3c981dcf7137ff7f9f31d602f Mon Sep 17 00:00:00 2001 From: Adam Perlin Date: Wed, 20 May 2026 10:28:29 -0700 Subject: [PATCH 08/22] jit-format --- src/coreclr/jit/emitwasm.cpp | 17 ++++++++--------- src/coreclr/jit/emitwasm.h | 1 - 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/emitwasm.cpp b/src/coreclr/jit/emitwasm.cpp index 3422ffee229f29..dc35bc3c0daa5d 100644 --- a/src/coreclr/jit/emitwasm.cpp +++ b/src/coreclr/jit/emitwasm.cpp @@ -370,7 +370,7 @@ const uint8_t* emitter::emitGetV128ImmValue(const instrDesc* id) { assert(id->idIsV128Imm()); return static_cast(id)->v128Bytes; -} +} const uint8_t emitter::emitGetLaneImmValue(const instrDesc* id) { @@ -392,8 +392,8 @@ const uint8_t emitter::emitGetLaneImmValue(const instrDesc* id) void emitter::emitIns_V128Imm(instruction ins, const uint8_t* bytes) { assert(bytes != nullptr); - instrDescV128Imm* id = static_cast(emitAllocAnyInstr(sizeof(instrDescV128Imm), EA_16BYTE)); - insFormat fmt = emitInsFormat(ins); + instrDescV128Imm* id = static_cast(emitAllocAnyInstr(sizeof(instrDescV128Imm), EA_16BYTE)); + insFormat fmt = emitInsFormat(ins); id->idInsFmt(fmt); id->idIns(ins); @@ -413,7 +413,7 @@ void emitter::emitIns_V128Imm(instruction ins, const uint8_t* bytes) // void emitter::emitIns_Lane(instruction ins, emitAttr attr, uint8_t laneIdx) { - instrDesc* id = emitNewInstrSC(attr, laneIdx); + instrDesc* id = emitNewInstrSC(attr, laneIdx); insFormat fmt = emitInsFormat(ins); id->idInsFmt(fmt); @@ -434,9 +434,8 @@ void emitter::emitIns_Lane(instruction ins, emitAttr attr, uint8_t laneIdx) // void emitter::emitIns_MemargLane(instruction ins, emitAttr attr, cnsval_ssize_t offset, uint8_t laneIdx) { - instrDescMemargLane* id = - static_cast(emitAllocAnyInstr(sizeof(instrDescMemargLane), attr)); - insFormat fmt = emitInsFormat(ins); + instrDescMemargLane* id = static_cast(emitAllocAnyInstr(sizeof(instrDescMemargLane), attr)); + insFormat fmt = emitInsFormat(ins); id->idInsFmt(fmt); id->idIns(ins); @@ -998,8 +997,8 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) { dst += emitOutputOpcode(dst, ins); const instrDescMemargLane* idMemLane = static_cast(id); - uint64_t align = emitGetAlignHintLog2(id); - uint64_t offset = emitGetInsSC(id); + uint64_t align = emitGetAlignHintLog2(id); + uint64_t offset = emitGetInsSC(id); assert(align <= UINT32_MAX); assert(align < 64); dst += emitOutputULEB128(dst, align); diff --git a/src/coreclr/jit/emitwasm.h b/src/coreclr/jit/emitwasm.h index b27a34d00a4e07..4af4bbabf06f44 100644 --- a/src/coreclr/jit/emitwasm.h +++ b/src/coreclr/jit/emitwasm.h @@ -54,7 +54,6 @@ static unsigned int emitGetValTypeImmImm(const instrDesc* id); const uint8_t* emitGetV128ImmValue(const instrDesc* id); const uint8_t emitGetLaneImmValue(const instrDesc* id); - /************************************************************************/ /* Private members that deal with target-dependent instr. descriptors */ /************************************************************************/ From 0f6956a8088b075eb8e0fe9a75415d0eb83f6625 Mon Sep 17 00:00:00 2001 From: Adam Perlin Date: Wed, 20 May 2026 13:17:40 -0700 Subject: [PATCH 09/22] Fix some feedback --- src/coreclr/jit/codegenwasm.cpp | 4 ++-- src/coreclr/jit/emitwasm.cpp | 37 +++++++++++++++++++-------------- src/coreclr/jit/emitwasm.h | 2 +- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index c82de0bfc751e9..685a5ebf1833f0 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -3460,7 +3460,7 @@ void CodeGen::genWasmEmitterUnitTestsSimd() { emitter* emit = GetEmitter(); - // --- IF_V128_CONST: v128.const with 16 raw bytes --- + // --- IF_V128: v128.const with 16 raw bytes --- const uint8_t v128Bytes[16] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F}; emit->emitIns_V128Imm(INS_v128_const, v128Bytes); @@ -3508,7 +3508,7 @@ void CodeGen::genWasmEmitterUnitTestsSimd() emit->emitIns_MemargLane(INS_v128_store32_lane, EA_4BYTE, 32, 1); emit->emitIns_MemargLane(INS_v128_store64_lane, EA_8BYTE, 256, 0); - // --- IF_SHUFFLE: i8x16.shuffle with 16 lane-index bytes --- + // --- IF_V128: i8x16.shuffle with 16 lane-index bytes --- // Identity shuffle const uint8_t identityShuffle[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; emit->emitIns_V128Imm(INS_i8x16_shuffle, identityShuffle); diff --git a/src/coreclr/jit/emitwasm.cpp b/src/coreclr/jit/emitwasm.cpp index e2a0c233096303..b58e8b7714abd1 100644 --- a/src/coreclr/jit/emitwasm.cpp +++ b/src/coreclr/jit/emitwasm.cpp @@ -380,7 +380,7 @@ const uint8_t* emitter::emitGetV128ImmValue(const instrDesc* id) return static_cast(id)->v128Bytes; } -const uint8_t emitter::emitGetLaneImmValue(const instrDesc* id) +uint8_t emitter::emitGetLaneImmValue(const instrDesc* id) { assert(id->idIsMemargLaneImm()); return static_cast(id)->lane; @@ -391,10 +391,10 @@ const uint8_t emitter::emitGetLaneImmValue(const instrDesc* id) //------------------------------------------------------------------------ //------------------------------------------------------------------------ -// emitIns_V128Const: Emit a v128.const instruction with 16 raw bytes. +// emitIns_V128Imm: Emit a packed SIMD instruction with a 16 byte vector immediate. // // Arguments: -// ins - instruction (INS_v128_const) +// ins - instruction (currently used with INS_v128_const and INS_i8x16_shuffle) // bytes - pointer to 16 bytes of constant data // void emitter::emitIns_V128Imm(instruction ins, const uint8_t* bytes) @@ -511,7 +511,22 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id) const return SMALL_IDSC_SIZE; } - if (id->idIsLargeCns() && !IsWasmSimdInstruction(id->idIns())) + if (IsWasmSimdInstruction(id->idIns())) + { + // Some (not all) SIMD instructions have larger instrDescs, + // and these cases are handled here. + switch (id->idInsFmt()) + { + case IF_V128: + return sizeof(instrDescV128Imm); + case IF_MEMARG_LANE: + return sizeof(instrDescMemargLane); + default: // all other SIMD instructions can fit in a standard instrDesc + break; + } + } + + if (id->idIsLargeCns()) { assert(!id->idIsLargeDsp()); assert(!id->idIsLargeCall()); @@ -528,16 +543,6 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id) const return sizeof(instrDescValTypeImm); } - // SIMD cases - switch (id->idInsFmt()) - { - case IF_V128: - return sizeof(instrDescV128Imm); - case IF_MEMARG_LANE: - return sizeof(instrDescMemargLane); - default: // IF_LANE can fit in a standard instrDesc - break; - } return sizeof(instrDesc); } @@ -1337,7 +1342,7 @@ void emitter::emitDispIns( cnsval_size_t lane = emitGetInsSC(id); assert(FitsIn(lane)); - printf(" %u", lane); + printf(" %u", (uint8_t)lane); } break; @@ -1349,7 +1354,7 @@ void emitter::emitDispIns( dispLclVarInfoIfAny(); uint8_t lane = emitGetLaneImmValue(id); - printf(" %u", lane); + printf(" %u", (uint8_t)lane); } break; diff --git a/src/coreclr/jit/emitwasm.h b/src/coreclr/jit/emitwasm.h index 537839f166d193..12c9999bee916b 100644 --- a/src/coreclr/jit/emitwasm.h +++ b/src/coreclr/jit/emitwasm.h @@ -53,7 +53,7 @@ static WasmValueType emitGetValTypeImmType(const instrDesc* id); static unsigned int emitGetValTypeImmImm(const instrDesc* id); const uint8_t* emitGetV128ImmValue(const instrDesc* id); -const uint8_t emitGetLaneImmValue(const instrDesc* id); +uint8_t emitGetLaneImmValue(const instrDesc* id); /************************************************************************/ /* Private members that deal with target-dependent instr. descriptors */ From c505813827bd2bf46e860068c656f1c8198212af Mon Sep 17 00:00:00 2001 From: Adam Perlin Date: Wed, 20 May 2026 14:29:01 -0700 Subject: [PATCH 10/22] Add comment to skip over emitter unit tests for Wasm --- src/coreclr/jit/codegenlinear.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 54b6b488d85a36..0fc77a21e2b48e 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2731,6 +2731,9 @@ void CodeGen::genEmitterUnitTests() #ifndef TARGET_WASM inst_JMP(EJ_jmp, skipLabel); #else + // On Wasm, we need a pre-defined label to emit a branch, so we use skipLabel for this. + // We skip over the generated emitter test code by nesting it in a block where the + // first instruction is a "skip" branch to the end of the block. genDefineTempLabel(skipLabel); instGen(INS_block); GetEmitter()->emitIns_J(INS_br, EA_4BYTE, 0, skipLabel); From 12eecaba7c0a39ab506891ea8fa215a0029337e9 Mon Sep 17 00:00:00 2001 From: Adam Perlin Date: Wed, 20 May 2026 14:57:17 -0700 Subject: [PATCH 11/22] Make wasm emitter simd unit tests type check valid wasm --- src/coreclr/jit/codegenwasm.cpp | 228 ++++++++++++++++++++++++-------- 1 file changed, 174 insertions(+), 54 deletions(-) diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 685a5ebf1833f0..7c43f7a3878a2d 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -3453,113 +3453,233 @@ void CodeGen::inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock) // load/store lane, and plain-opcode SIMD instructions). // // This is a temporary debug-only test that verifies the encoding paths -// do not assert or crash. The emitted instructions are not intended to -// form a valid Wasm program. +// do not assert or crash. Each instruction is emitted with valid stack +// operands so the resulting bytecode is semantically valid Wasm. // void CodeGen::genWasmEmitterUnitTestsSimd() { emitter* emit = GetEmitter(); + // Helper macros to push typed constants, ensuring valid stack state. + // clang-format off +#define PUSH_V128(emit, bytes) (emit)->emitIns_V128Imm(INS_v128_const, (bytes)) +#define PUSH_I32(emit, val) (emit)->emitIns_I(INS_i32_const, EA_4BYTE, (val)) +#define PUSH_I64(emit, val) (emit)->emitIns_I(INS_i64_const, EA_8BYTE, (val)) +#define PUSH_F32(emit, val) (emit)->emitIns_I(INS_f32_const, EA_4BYTE, (val)) +#define PUSH_F64(emit, val) (emit)->emitIns_I(INS_f64_const, EA_8BYTE, (val)) +#define DROP(emit) (emit)->emitIns(INS_drop) + + // Unary v128 -> v128: push operand, emit instruction, drop result +#define TEST_UNARY_V128(emit, bytes, ins) \ + PUSH_V128(emit, bytes); \ + (emit)->emitIns(ins); \ + DROP(emit) + + // Binary v128 x v128 -> v128: push two operands, emit instruction, drop result +#define TEST_BINARY_V128(emit, bytes, ins) \ + PUSH_V128(emit, bytes); \ + PUSH_V128(emit, bytes); \ + (emit)->emitIns(ins); \ + DROP(emit) + + // Unary v128 -> i32 (e.g., bitmask, any_true, all_true) +#define TEST_V128_TO_I32(emit, bytes, ins) \ + PUSH_V128(emit, bytes); \ + (emit)->emitIns(ins); \ + DROP(emit) + + // Extract lane: v128 -> scalar (i32/i64/f32/f64), then drop +#define TEST_EXTRACT_LANE(emit, bytes, ins, attr, lane) \ + PUSH_V128(emit, bytes); \ + (emit)->emitIns_Lane(ins, attr, lane); \ + DROP(emit) + + // Replace lane: [v128, scalar] -> v128, then drop +#define TEST_REPLACE_LANE_I32(emit, bytes, ins, attr, lane) \ + PUSH_V128(emit, bytes); \ + PUSH_I32(emit, 42); \ + (emit)->emitIns_Lane(ins, attr, lane); \ + DROP(emit) + +#define TEST_REPLACE_LANE_I64(emit, bytes, ins, attr, lane) \ + PUSH_V128(emit, bytes); \ + PUSH_I64(emit, 42); \ + (emit)->emitIns_Lane(ins, attr, lane); \ + DROP(emit) + +#define TEST_REPLACE_LANE_F32(emit, bytes, ins, attr, lane) \ + PUSH_V128(emit, bytes); \ + PUSH_F32(emit, 0); \ + (emit)->emitIns_Lane(ins, attr, lane); \ + DROP(emit) + +#define TEST_REPLACE_LANE_F64(emit, bytes, ins, attr, lane) \ + PUSH_V128(emit, bytes); \ + PUSH_F64(emit, 0); \ + (emit)->emitIns_Lane(ins, attr, lane); \ + DROP(emit) + + // Load lane: [i32_addr, v128] -> v128, then drop +#define TEST_LOAD_LANE(emit, bytes, ins, attr, offset, lane) \ + PUSH_I32(emit, 0); \ + PUSH_V128(emit, bytes); \ + (emit)->emitIns_MemargLane(ins, attr, offset, lane); \ + DROP(emit) + + // Store lane: [i32_addr, v128] -> void +#define TEST_STORE_LANE(emit, bytes, ins, attr, offset, lane) \ + PUSH_I32(emit, 0); \ + PUSH_V128(emit, bytes); \ + (emit)->emitIns_MemargLane(ins, attr, offset, lane) + + // Shuffle: [v128, v128] -> v128, then drop +#define TEST_SHUFFLE(emit, bytes, shuffleBytes) \ + PUSH_V128(emit, bytes); \ + PUSH_V128(emit, bytes); \ + (emit)->emitIns_V128Imm(INS_i8x16_shuffle, shuffleBytes); \ + DROP(emit) + // clang-format on + // --- IF_V128: v128.const with 16 raw bytes --- const uint8_t v128Bytes[16] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F}; - emit->emitIns_V128Imm(INS_v128_const, v128Bytes); + PUSH_V128(emit, v128Bytes); + DROP(emit); // All-zeros and all-ones constants const uint8_t v128Zeros[16] = {0}; - emit->emitIns_V128Imm(INS_v128_const, v128Zeros); + PUSH_V128(emit, v128Zeros); + DROP(emit); const uint8_t v128Ones[16] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; - emit->emitIns_V128Imm(INS_v128_const, v128Ones); + PUSH_V128(emit, v128Ones); + DROP(emit); // --- IF_LANE: extract/replace lane instructions --- // i8x16 lanes (0..15) - emit->emitIns_Lane(INS_i8x16_extract_lane_s, EA_1BYTE, 0); - emit->emitIns_Lane(INS_i8x16_extract_lane_u, EA_1BYTE, 15); + TEST_EXTRACT_LANE(emit, v128Ones, INS_i8x16_extract_lane_s, EA_1BYTE, 0); + TEST_EXTRACT_LANE(emit, v128Ones, INS_i8x16_extract_lane_u, EA_1BYTE, 15); + TEST_REPLACE_LANE_I32(emit, v128Ones, INS_i8x16_replace_lane, EA_1BYTE, 7); // i16x8 lanes (0..7) - emit->emitIns_Lane(INS_i16x8_extract_lane_s, EA_2BYTE, 0); - emit->emitIns_Lane(INS_i16x8_extract_lane_u, EA_2BYTE, 7); + TEST_EXTRACT_LANE(emit, v128Ones, INS_i16x8_extract_lane_s, EA_2BYTE, 0); + TEST_EXTRACT_LANE(emit, v128Ones, INS_i16x8_extract_lane_u, EA_2BYTE, 7); + TEST_REPLACE_LANE_I32(emit, v128Ones, INS_i16x8_replace_lane, EA_2BYTE, 3); // i32x4 lanes (0..3) - emit->emitIns_Lane(INS_i32x4_extract_lane, EA_4BYTE, 0); - emit->emitIns_Lane(INS_i32x4_replace_lane, EA_4BYTE, 3); + TEST_EXTRACT_LANE(emit, v128Ones, INS_i32x4_extract_lane, EA_4BYTE, 0); + TEST_REPLACE_LANE_I32(emit, v128Ones, INS_i32x4_replace_lane, EA_4BYTE, 3); // i64x2 lanes (0..1) - emit->emitIns_Lane(INS_i64x2_extract_lane, EA_8BYTE, 0); - emit->emitIns_Lane(INS_i64x2_replace_lane, EA_8BYTE, 1); + TEST_EXTRACT_LANE(emit, v128Ones, INS_i64x2_extract_lane, EA_8BYTE, 0); + TEST_REPLACE_LANE_I64(emit, v128Ones, INS_i64x2_replace_lane, EA_8BYTE, 1); // f32x4 lanes (0..3) - emit->emitIns_Lane(INS_f32x4_extract_lane, EA_4BYTE, 3); - emit->emitIns_Lane(INS_f32x4_replace_lane, EA_4BYTE, 0); + TEST_EXTRACT_LANE(emit, v128Ones, INS_f32x4_extract_lane, EA_4BYTE, 3); + TEST_REPLACE_LANE_F32(emit, v128Ones, INS_f32x4_replace_lane, EA_4BYTE, 0); // f64x2 lanes (0..1) - emit->emitIns_Lane(INS_f64x2_extract_lane, EA_8BYTE, 0); - emit->emitIns_Lane(INS_f64x2_replace_lane, EA_8BYTE, 1); + TEST_EXTRACT_LANE(emit, v128Ones, INS_f64x2_extract_lane, EA_8BYTE, 0); + TEST_REPLACE_LANE_F64(emit, v128Ones, INS_f64x2_replace_lane, EA_8BYTE, 1); // --- IF_MEMARG_LANE: load/store lane with memarg --- - emit->emitIns_MemargLane(INS_v128_load8_lane, EA_1BYTE, 0, 5); - emit->emitIns_MemargLane(INS_v128_load16_lane, EA_2BYTE, 16, 3); - emit->emitIns_MemargLane(INS_v128_load32_lane, EA_4BYTE, 64, 2); - emit->emitIns_MemargLane(INS_v128_load64_lane, EA_8BYTE, 128, 1); - emit->emitIns_MemargLane(INS_v128_store8_lane, EA_1BYTE, 0, 0); - emit->emitIns_MemargLane(INS_v128_store16_lane, EA_2BYTE, 8, 7); - emit->emitIns_MemargLane(INS_v128_store32_lane, EA_4BYTE, 32, 1); - emit->emitIns_MemargLane(INS_v128_store64_lane, EA_8BYTE, 256, 0); + TEST_LOAD_LANE(emit, v128Ones, INS_v128_load8_lane, EA_1BYTE, 0, 5); + TEST_LOAD_LANE(emit, v128Ones, INS_v128_load16_lane, EA_2BYTE, 16, 3); + TEST_LOAD_LANE(emit, v128Ones, INS_v128_load32_lane, EA_4BYTE, 64, 2); + TEST_LOAD_LANE(emit, v128Ones, INS_v128_load64_lane, EA_8BYTE, 128, 1); + TEST_STORE_LANE(emit, v128Ones, INS_v128_store8_lane, EA_1BYTE, 0, 0); + TEST_STORE_LANE(emit, v128Ones, INS_v128_store16_lane, EA_2BYTE, 8, 7); + TEST_STORE_LANE(emit, v128Ones, INS_v128_store32_lane, EA_4BYTE, 32, 1); + TEST_STORE_LANE(emit, v128Ones, INS_v128_store64_lane, EA_8BYTE, 256, 0); // --- IF_V128: i8x16.shuffle with 16 lane-index bytes --- // Identity shuffle const uint8_t identityShuffle[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; - emit->emitIns_V128Imm(INS_i8x16_shuffle, identityShuffle); + TEST_SHUFFLE(emit, v128Bytes, identityShuffle); // Reverse bytes const uint8_t reverseShuffle[16] = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; - emit->emitIns_V128Imm(INS_i8x16_shuffle, reverseShuffle); + TEST_SHUFFLE(emit, v128Bytes, reverseShuffle); // Cross-operand shuffle (indices 16..31 refer to the second operand) const uint8_t crossShuffle[16] = {0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31}; - emit->emitIns_V128Imm(INS_i8x16_shuffle, crossShuffle); + TEST_SHUFFLE(emit, v128Bytes, crossShuffle); // --- IF_OPCODE: plain opcode SIMD instructions (representative sample) --- - // Splat operations + // Splat operations: push scalar, splat to v128, drop + PUSH_I32(emit, 1); emit->emitIns(INS_i8x16_splat); + DROP(emit); + + PUSH_I32(emit, 2); emit->emitIns(INS_i16x8_splat); + DROP(emit); + + PUSH_I32(emit, 3); emit->emitIns(INS_i32x4_splat); + DROP(emit); + + PUSH_I64(emit, 4); emit->emitIns(INS_i64x2_splat); + DROP(emit); + + PUSH_F32(emit, 0); emit->emitIns(INS_f32x4_splat); + DROP(emit); + + PUSH_F64(emit, 0); emit->emitIns(INS_f64x2_splat); + DROP(emit); - // Swizzle - emit->emitIns(INS_i8x16_swizzle); + // Swizzle: [v128, v128] -> v128 + TEST_BINARY_V128(emit, v128Ones, INS_i8x16_swizzle); - // A few comparisons - emit->emitIns(INS_i8x16_eq); - emit->emitIns(INS_i32x4_ne); - emit->emitIns(INS_f64x2_lt); + // A few comparisons: [v128, v128] -> v128 + TEST_BINARY_V128(emit, v128Ones, INS_i8x16_eq); + TEST_BINARY_V128(emit, v128Ones, INS_i32x4_ne); + TEST_BINARY_V128(emit, v128Ones, INS_f64x2_lt); // A few arithmetic ops - emit->emitIns(INS_i8x16_add); - emit->emitIns(INS_i32x4_mul); - emit->emitIns(INS_f32x4_sqrt); - emit->emitIns(INS_f64x2_neg); + TEST_BINARY_V128(emit, v128Ones, INS_i8x16_add); + TEST_BINARY_V128(emit, v128Ones, INS_i32x4_mul); + TEST_UNARY_V128(emit, v128Ones, INS_f32x4_sqrt); + TEST_UNARY_V128(emit, v128Ones, INS_f64x2_neg); // Bitwise ops - emit->emitIns(INS_v128_not); - emit->emitIns(INS_v128_and); - emit->emitIns(INS_v128_or); - emit->emitIns(INS_v128_xor); - emit->emitIns(INS_v128_andnot); - - // Bitmask / any_true / all_true - emit->emitIns(INS_v128_any_true); - emit->emitIns(INS_i8x16_all_true); - emit->emitIns(INS_i32x4_bitmask); - - // Conversion operations - emit->emitIns(INS_f32x4_convert_s_i32x4); - emit->emitIns(INS_f64x2_convert_low_u_i32x4); - emit->emitIns(INS_i32x4_trunc_sat_s_f32x4); + TEST_UNARY_V128(emit, v128Ones, INS_v128_not); + TEST_BINARY_V128(emit, v128Ones, INS_v128_and); + TEST_BINARY_V128(emit, v128Ones, INS_v128_or); + TEST_BINARY_V128(emit, v128Ones, INS_v128_xor); + TEST_BINARY_V128(emit, v128Ones, INS_v128_andnot); + + // Bitmask / any_true / all_true: v128 -> i32 + TEST_V128_TO_I32(emit, v128Ones, INS_v128_any_true); + TEST_V128_TO_I32(emit, v128Ones, INS_i8x16_all_true); + TEST_V128_TO_I32(emit, v128Ones, INS_i32x4_bitmask); + + // Conversion operations: v128 -> v128 + TEST_UNARY_V128(emit, v128Ones, INS_f32x4_convert_s_i32x4); + TEST_UNARY_V128(emit, v128Ones, INS_f64x2_convert_low_u_i32x4); + TEST_UNARY_V128(emit, v128Ones, INS_i32x4_trunc_sat_s_f32x4); + +#undef PUSH_V128 +#undef PUSH_I32 +#undef PUSH_I64 +#undef PUSH_F32 +#undef PUSH_F64 +#undef DROP +#undef TEST_UNARY_V128 +#undef TEST_BINARY_V128 +#undef TEST_V128_TO_I32 +#undef TEST_EXTRACT_LANE +#undef TEST_REPLACE_LANE_I32 +#undef TEST_REPLACE_LANE_I64 +#undef TEST_REPLACE_LANE_F32 +#undef TEST_REPLACE_LANE_F64 +#undef TEST_LOAD_LANE +#undef TEST_STORE_LANE +#undef TEST_SHUFFLE } #endif // defined(DEBUG) From b5f7f727f6cb26ee39464b961e5fac2f69c2c785 Mon Sep 17 00:00:00 2001 From: adamperlin Date: Wed, 20 May 2026 15:27:27 -0700 Subject: [PATCH 12/22] Address PR review comments for Wasm SIMD encodings - Remove redundant assert(align <= UINT32_MAX) since assert(align < 64) implies it - Add format asserts to emitIns_V128Imm, emitIns_Lane, emitIns_MemargLane - Add non-negative offset assert in emitIns_MemargLane - Use emitIns_BlockTy(INS_block) instead of instGen(INS_block) in emitter unit tests - Move INS_end outside conditional so Wasm block is always properly terminated Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/codegenlinear.cpp | 4 ++-- src/coreclr/jit/emitwasm.cpp | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 0fc77a21e2b48e..c09da7df8ad4a8 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2735,7 +2735,7 @@ void CodeGen::genEmitterUnitTests() // We skip over the generated emitter test code by nesting it in a block where the // first instruction is a "skip" branch to the end of the block. genDefineTempLabel(skipLabel); - instGen(INS_block); + GetEmitter()->emitIns_BlockTy(INS_block); GetEmitter()->emitIns_J(INS_br, EA_4BYTE, 0, skipLabel); #endif @@ -2792,8 +2792,8 @@ void CodeGen::genEmitterUnitTests() if (unitTestSectionAll || (strstr(unitTestSection, "simd") != nullptr)) { genWasmEmitterUnitTestsSimd(); - instGen(INS_end); } + instGen(INS_end); #endif #ifndef TARGET_WASM diff --git a/src/coreclr/jit/emitwasm.cpp b/src/coreclr/jit/emitwasm.cpp index b58e8b7714abd1..2ca57c28f676d6 100644 --- a/src/coreclr/jit/emitwasm.cpp +++ b/src/coreclr/jit/emitwasm.cpp @@ -402,6 +402,7 @@ void emitter::emitIns_V128Imm(instruction ins, const uint8_t* bytes) assert(bytes != nullptr); instrDescV128Imm* id = static_cast(emitAllocAnyInstr(sizeof(instrDescV128Imm), EA_16BYTE)); insFormat fmt = emitInsFormat(ins); + assert(fmt == IF_V128); id->idInsFmt(fmt); id->idIns(ins); @@ -423,6 +424,7 @@ void emitter::emitIns_Lane(instruction ins, emitAttr attr, uint8_t laneIdx) { instrDesc* id = emitNewInstrSC(attr, laneIdx); insFormat fmt = emitInsFormat(ins); + assert(fmt == IF_LANE); id->idInsFmt(fmt); id->idIns(ins); @@ -444,6 +446,8 @@ void emitter::emitIns_MemargLane(instruction ins, emitAttr attr, cnsval_ssize_t { instrDescMemargLane* id = static_cast(emitAllocAnyInstr(sizeof(instrDescMemargLane), attr)); insFormat fmt = emitInsFormat(ins); + assert(fmt == IF_MEMARG_LANE); + assert(offset >= 0); id->idInsFmt(fmt); id->idIns(ins); @@ -1034,7 +1038,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) const instrDescMemargLane* idMemLane = static_cast(id); uint64_t align = emitGetAlignHintLog2(id); uint64_t offset = emitGetInsSC(id); - assert(align <= UINT32_MAX); assert(align < 64); dst += emitOutputULEB128(dst, align); dst += emitOutputULEB128(dst, offset); From 58727a462657f31d0c0694f7605b88489717d0eb Mon Sep 17 00:00:00 2001 From: adamperlin Date: Wed, 20 May 2026 15:31:26 -0700 Subject: [PATCH 13/22] jit-format --- src/coreclr/jit/emitwasm.cpp | 1 - src/coreclr/jit/emitwasm.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/emitwasm.cpp b/src/coreclr/jit/emitwasm.cpp index 2ca57c28f676d6..72052240842510 100644 --- a/src/coreclr/jit/emitwasm.cpp +++ b/src/coreclr/jit/emitwasm.cpp @@ -547,7 +547,6 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id) const return sizeof(instrDescValTypeImm); } - return sizeof(instrDesc); } diff --git a/src/coreclr/jit/emitwasm.h b/src/coreclr/jit/emitwasm.h index 12c9999bee916b..b9435a9372740b 100644 --- a/src/coreclr/jit/emitwasm.h +++ b/src/coreclr/jit/emitwasm.h @@ -53,7 +53,7 @@ static WasmValueType emitGetValTypeImmType(const instrDesc* id); static unsigned int emitGetValTypeImmImm(const instrDesc* id); const uint8_t* emitGetV128ImmValue(const instrDesc* id); -uint8_t emitGetLaneImmValue(const instrDesc* id); +uint8_t emitGetLaneImmValue(const instrDesc* id); /************************************************************************/ /* Private members that deal with target-dependent instr. descriptors */ From bab42c8629b6780d01f0992e69eaf5283d02a21f Mon Sep 17 00:00:00 2001 From: adamperlin Date: Wed, 20 May 2026 17:42:43 -0700 Subject: [PATCH 14/22] More feedback --- src/coreclr/jit/emitwasm.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/emitwasm.cpp b/src/coreclr/jit/emitwasm.cpp index 72052240842510..3b5da70f79ebf2 100644 --- a/src/coreclr/jit/emitwasm.cpp +++ b/src/coreclr/jit/emitwasm.cpp @@ -1019,8 +1019,8 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_V128: { dst += emitOutputOpcode(dst, ins); - const instrDescV128Imm* idConst = static_cast(id); - dst += emitRawBytes(dst, idConst->idV128Const(), 16); + const uint8_t* v128Value = emitGetV128ImmValue(id); + dst += emitRawBytes(dst, v128Value, 16); break; } case IF_LANE: @@ -1034,13 +1034,13 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_MEMARG_LANE: { dst += emitOutputOpcode(dst, ins); - const instrDescMemargLane* idMemLane = static_cast(id); - uint64_t align = emitGetAlignHintLog2(id); - uint64_t offset = emitGetInsSC(id); + uint8_t laneIdx = emitGetLaneImmValue(id); + uint64_t align = emitGetAlignHintLog2(id); + uint64_t offset = emitGetInsSC(id); assert(align < 64); dst += emitOutputULEB128(dst, align); dst += emitOutputULEB128(dst, offset); - dst += emitOutputByte(dst, idMemLane->idLaneIdx()); + dst += emitOutputByte(dst, laneIdx); break; } default: From e8c3a23114b8f93fe4df31a36deb3bb1bf3c14fe Mon Sep 17 00:00:00 2001 From: adamperlin Date: Thu, 21 May 2026 12:58:15 -0700 Subject: [PATCH 15/22] Add Wasm emitter bounds checks for SIMD lane indices Clean up macros in wasm simd emitter unit tests --- src/coreclr/jit/codegeninterface.h | 6 + src/coreclr/jit/codegenwasm.cpp | 246 ++++++++++++++--------------- src/coreclr/jit/emitwasm.cpp | 46 +++++- src/coreclr/jit/emitwasm.h | 1 + src/coreclr/jit/instr.cpp | 7 + src/coreclr/jit/instrswasm.h | 48 +++--- 6 files changed, 202 insertions(+), 152 deletions(-) diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index ea160060717233..2cecb89cd278c4 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -212,6 +212,12 @@ class CodeGenInterface bool IsEmbeddedBroadcastEnabled(instruction ins, GenTree* op); #endif // TARGET_XARCH +#if defined(TARGET_WASM) +// On wasm, we store the simd element size in the upper 7 bits of the instruction info. +// The lower bit is reserved as an FP flag. +#define INST_INFO_ELEM_SIZE_SHIFT 0x1 + static uint8_t instSimdElemSize(instruction ins); +#endif //------------------------------------------------------------------------- // Liveness-related fields & methods public: diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 7c43f7a3878a2d..d0f6835a368e16 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -3462,206 +3462,200 @@ void CodeGen::genWasmEmitterUnitTestsSimd() // Helper macros to push typed constants, ensuring valid stack state. // clang-format off -#define PUSH_V128(emit, bytes) (emit)->emitIns_V128Imm(INS_v128_const, (bytes)) -#define PUSH_I32(emit, val) (emit)->emitIns_I(INS_i32_const, EA_4BYTE, (val)) -#define PUSH_I64(emit, val) (emit)->emitIns_I(INS_i64_const, EA_8BYTE, (val)) -#define PUSH_F32(emit, val) (emit)->emitIns_I(INS_f32_const, EA_4BYTE, (val)) -#define PUSH_F64(emit, val) (emit)->emitIns_I(INS_f64_const, EA_8BYTE, (val)) -#define DROP(emit) (emit)->emitIns(INS_drop) - - // Unary v128 -> v128: push operand, emit instruction, drop result -#define TEST_UNARY_V128(emit, bytes, ins) \ - PUSH_V128(emit, bytes); \ - (emit)->emitIns(ins); \ - DROP(emit) +#define PUSH_V128(bytes) emit->emitIns_V128Imm(INS_v128_const, (bytes)) +#define PUSH_I32(val) emit->emitIns_I(INS_i32_const, EA_4BYTE, (val)) +#define PUSH_I64(val) emit->emitIns_I(INS_i64_const, EA_8BYTE, (val)) +#define PUSH_F32(val) emit->emitIns_I(INS_f32_const, EA_4BYTE, (val)) +#define PUSH_F64(val) emit->emitIns_I(INS_f64_const, EA_8BYTE, (val)) +#define DROP emit->emitIns(INS_drop) + + // Unary v128 -> result: push operand, emit instruction, drop result +#define TEST_UNARY_V128(bytes, ins) \ + PUSH_V128(bytes); \ + emit->emitIns(ins); \ + DROP // Binary v128 x v128 -> v128: push two operands, emit instruction, drop result -#define TEST_BINARY_V128(emit, bytes, ins) \ - PUSH_V128(emit, bytes); \ - PUSH_V128(emit, bytes); \ - (emit)->emitIns(ins); \ - DROP(emit) - - // Unary v128 -> i32 (e.g., bitmask, any_true, all_true) -#define TEST_V128_TO_I32(emit, bytes, ins) \ - PUSH_V128(emit, bytes); \ - (emit)->emitIns(ins); \ - DROP(emit) +#define TEST_BINARY_V128(bytes, ins) \ + PUSH_V128(bytes); \ + PUSH_V128(bytes); \ + emit->emitIns(ins); \ + DROP // Extract lane: v128 -> scalar (i32/i64/f32/f64), then drop -#define TEST_EXTRACT_LANE(emit, bytes, ins, attr, lane) \ - PUSH_V128(emit, bytes); \ - (emit)->emitIns_Lane(ins, attr, lane); \ - DROP(emit) +#define TEST_EXTRACT_LANE(bytes, ins, attr, lane) \ + PUSH_V128(bytes); \ + emit->emitIns_Lane(ins, attr, lane); \ + DROP // Replace lane: [v128, scalar] -> v128, then drop -#define TEST_REPLACE_LANE_I32(emit, bytes, ins, attr, lane) \ - PUSH_V128(emit, bytes); \ - PUSH_I32(emit, 42); \ - (emit)->emitIns_Lane(ins, attr, lane); \ - DROP(emit) - -#define TEST_REPLACE_LANE_I64(emit, bytes, ins, attr, lane) \ - PUSH_V128(emit, bytes); \ - PUSH_I64(emit, 42); \ - (emit)->emitIns_Lane(ins, attr, lane); \ - DROP(emit) - -#define TEST_REPLACE_LANE_F32(emit, bytes, ins, attr, lane) \ - PUSH_V128(emit, bytes); \ - PUSH_F32(emit, 0); \ - (emit)->emitIns_Lane(ins, attr, lane); \ - DROP(emit) - -#define TEST_REPLACE_LANE_F64(emit, bytes, ins, attr, lane) \ - PUSH_V128(emit, bytes); \ - PUSH_F64(emit, 0); \ - (emit)->emitIns_Lane(ins, attr, lane); \ - DROP(emit) +#define TEST_REPLACE_LANE_I32(bytes, ins, attr, lane) \ + PUSH_V128(bytes); \ + PUSH_I32(42); \ + emit->emitIns_Lane(ins, attr, lane); \ + DROP + +#define TEST_REPLACE_LANE_I64(bytes, ins, attr, lane) \ + PUSH_V128(bytes); \ + PUSH_I64(42); \ + emit->emitIns_Lane(ins, attr, lane); \ + DROP + +#define TEST_REPLACE_LANE_F32(bytes, ins, attr, lane) \ + PUSH_V128(bytes); \ + PUSH_F32(0); \ + emit->emitIns_Lane(ins, attr, lane); \ + DROP + +#define TEST_REPLACE_LANE_F64(bytes, ins, attr, lane) \ + PUSH_V128(bytes); \ + PUSH_F64(0); \ + emit->emitIns_Lane(ins, attr, lane); \ + DROP // Load lane: [i32_addr, v128] -> v128, then drop -#define TEST_LOAD_LANE(emit, bytes, ins, attr, offset, lane) \ - PUSH_I32(emit, 0); \ - PUSH_V128(emit, bytes); \ - (emit)->emitIns_MemargLane(ins, attr, offset, lane); \ - DROP(emit) +#define TEST_LOAD_LANE(bytes, ins, attr, offset, lane) \ + PUSH_I32(0); \ + PUSH_V128(bytes); \ + emit->emitIns_MemargLane(ins, attr, offset, lane); \ + DROP // Store lane: [i32_addr, v128] -> void -#define TEST_STORE_LANE(emit, bytes, ins, attr, offset, lane) \ - PUSH_I32(emit, 0); \ - PUSH_V128(emit, bytes); \ - (emit)->emitIns_MemargLane(ins, attr, offset, lane) +#define TEST_STORE_LANE(bytes, ins, attr, offset, lane) \ + PUSH_I32(0); \ + PUSH_V128(bytes); \ + emit->emitIns_MemargLane(ins, attr, offset, lane) // Shuffle: [v128, v128] -> v128, then drop -#define TEST_SHUFFLE(emit, bytes, shuffleBytes) \ - PUSH_V128(emit, bytes); \ - PUSH_V128(emit, bytes); \ - (emit)->emitIns_V128Imm(INS_i8x16_shuffle, shuffleBytes); \ - DROP(emit) +#define TEST_SHUFFLE(bytes, shuffleBytes) \ + PUSH_V128(bytes); \ + PUSH_V128(bytes); \ + emit->emitIns_V128Imm(INS_i8x16_shuffle, shuffleBytes); \ + DROP // clang-format on // --- IF_V128: v128.const with 16 raw bytes --- const uint8_t v128Bytes[16] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F}; - PUSH_V128(emit, v128Bytes); - DROP(emit); + PUSH_V128(v128Bytes); + DROP; // All-zeros and all-ones constants const uint8_t v128Zeros[16] = {0}; - PUSH_V128(emit, v128Zeros); - DROP(emit); + PUSH_V128(v128Zeros); + DROP; const uint8_t v128Ones[16] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; - PUSH_V128(emit, v128Ones); - DROP(emit); + PUSH_V128(v128Ones); + DROP; // --- IF_LANE: extract/replace lane instructions --- // i8x16 lanes (0..15) - TEST_EXTRACT_LANE(emit, v128Ones, INS_i8x16_extract_lane_s, EA_1BYTE, 0); - TEST_EXTRACT_LANE(emit, v128Ones, INS_i8x16_extract_lane_u, EA_1BYTE, 15); - TEST_REPLACE_LANE_I32(emit, v128Ones, INS_i8x16_replace_lane, EA_1BYTE, 7); + TEST_EXTRACT_LANE(v128Ones, INS_i8x16_extract_lane_s, EA_1BYTE, 0); + TEST_EXTRACT_LANE(v128Ones, INS_i8x16_extract_lane_u, EA_1BYTE, 15); + TEST_REPLACE_LANE_I32(v128Ones, INS_i8x16_replace_lane, EA_1BYTE, 7); // i16x8 lanes (0..7) - TEST_EXTRACT_LANE(emit, v128Ones, INS_i16x8_extract_lane_s, EA_2BYTE, 0); - TEST_EXTRACT_LANE(emit, v128Ones, INS_i16x8_extract_lane_u, EA_2BYTE, 7); - TEST_REPLACE_LANE_I32(emit, v128Ones, INS_i16x8_replace_lane, EA_2BYTE, 3); + TEST_EXTRACT_LANE(v128Ones, INS_i16x8_extract_lane_s, EA_2BYTE, 0); + TEST_EXTRACT_LANE(v128Ones, INS_i16x8_extract_lane_u, EA_2BYTE, 7); + TEST_REPLACE_LANE_I32(v128Ones, INS_i16x8_replace_lane, EA_2BYTE, 3); // i32x4 lanes (0..3) - TEST_EXTRACT_LANE(emit, v128Ones, INS_i32x4_extract_lane, EA_4BYTE, 0); - TEST_REPLACE_LANE_I32(emit, v128Ones, INS_i32x4_replace_lane, EA_4BYTE, 3); + TEST_EXTRACT_LANE(v128Ones, INS_i32x4_extract_lane, EA_4BYTE, 0); + TEST_REPLACE_LANE_I32(v128Ones, INS_i32x4_replace_lane, EA_4BYTE, 3); // i64x2 lanes (0..1) - TEST_EXTRACT_LANE(emit, v128Ones, INS_i64x2_extract_lane, EA_8BYTE, 0); - TEST_REPLACE_LANE_I64(emit, v128Ones, INS_i64x2_replace_lane, EA_8BYTE, 1); + TEST_EXTRACT_LANE(v128Ones, INS_i64x2_extract_lane, EA_8BYTE, 0); + TEST_REPLACE_LANE_I64(v128Ones, INS_i64x2_replace_lane, EA_8BYTE, 1); // f32x4 lanes (0..3) - TEST_EXTRACT_LANE(emit, v128Ones, INS_f32x4_extract_lane, EA_4BYTE, 3); - TEST_REPLACE_LANE_F32(emit, v128Ones, INS_f32x4_replace_lane, EA_4BYTE, 0); + TEST_EXTRACT_LANE(v128Ones, INS_f32x4_extract_lane, EA_4BYTE, 3); + TEST_REPLACE_LANE_F32(v128Ones, INS_f32x4_replace_lane, EA_4BYTE, 0); // f64x2 lanes (0..1) - TEST_EXTRACT_LANE(emit, v128Ones, INS_f64x2_extract_lane, EA_8BYTE, 0); - TEST_REPLACE_LANE_F64(emit, v128Ones, INS_f64x2_replace_lane, EA_8BYTE, 1); + TEST_EXTRACT_LANE(v128Ones, INS_f64x2_extract_lane, EA_8BYTE, 0); + TEST_REPLACE_LANE_F64(v128Ones, INS_f64x2_replace_lane, EA_8BYTE, 1); // --- IF_MEMARG_LANE: load/store lane with memarg --- - TEST_LOAD_LANE(emit, v128Ones, INS_v128_load8_lane, EA_1BYTE, 0, 5); - TEST_LOAD_LANE(emit, v128Ones, INS_v128_load16_lane, EA_2BYTE, 16, 3); - TEST_LOAD_LANE(emit, v128Ones, INS_v128_load32_lane, EA_4BYTE, 64, 2); - TEST_LOAD_LANE(emit, v128Ones, INS_v128_load64_lane, EA_8BYTE, 128, 1); - TEST_STORE_LANE(emit, v128Ones, INS_v128_store8_lane, EA_1BYTE, 0, 0); - TEST_STORE_LANE(emit, v128Ones, INS_v128_store16_lane, EA_2BYTE, 8, 7); - TEST_STORE_LANE(emit, v128Ones, INS_v128_store32_lane, EA_4BYTE, 32, 1); - TEST_STORE_LANE(emit, v128Ones, INS_v128_store64_lane, EA_8BYTE, 256, 0); + TEST_LOAD_LANE(v128Ones, INS_v128_load8_lane, EA_1BYTE, 0, 5); + TEST_LOAD_LANE(v128Ones, INS_v128_load16_lane, EA_2BYTE, 16, 3); + TEST_LOAD_LANE(v128Ones, INS_v128_load32_lane, EA_4BYTE, 64, 2); + TEST_LOAD_LANE(v128Ones, INS_v128_load64_lane, EA_8BYTE, 128, 1); + TEST_STORE_LANE(v128Ones, INS_v128_store8_lane, EA_1BYTE, 0, 0); + TEST_STORE_LANE(v128Ones, INS_v128_store16_lane, EA_2BYTE, 8, 7); + TEST_STORE_LANE(v128Ones, INS_v128_store32_lane, EA_4BYTE, 32, 1); + TEST_STORE_LANE(v128Ones, INS_v128_store64_lane, EA_8BYTE, 256, 0); // --- IF_V128: i8x16.shuffle with 16 lane-index bytes --- // Identity shuffle const uint8_t identityShuffle[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; - TEST_SHUFFLE(emit, v128Bytes, identityShuffle); + TEST_SHUFFLE(v128Bytes, identityShuffle); // Reverse bytes const uint8_t reverseShuffle[16] = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; - TEST_SHUFFLE(emit, v128Bytes, reverseShuffle); + TEST_SHUFFLE(v128Bytes, reverseShuffle); // Cross-operand shuffle (indices 16..31 refer to the second operand) const uint8_t crossShuffle[16] = {0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31}; - TEST_SHUFFLE(emit, v128Bytes, crossShuffle); + TEST_SHUFFLE(v128Bytes, crossShuffle); // --- IF_OPCODE: plain opcode SIMD instructions (representative sample) --- // Splat operations: push scalar, splat to v128, drop - PUSH_I32(emit, 1); + PUSH_I32(1); emit->emitIns(INS_i8x16_splat); - DROP(emit); + DROP; - PUSH_I32(emit, 2); + PUSH_I32(2); emit->emitIns(INS_i16x8_splat); - DROP(emit); + DROP; - PUSH_I32(emit, 3); + PUSH_I32(3); emit->emitIns(INS_i32x4_splat); - DROP(emit); + DROP; - PUSH_I64(emit, 4); + PUSH_I64(4); emit->emitIns(INS_i64x2_splat); - DROP(emit); + DROP; - PUSH_F32(emit, 0); + PUSH_F32(0); emit->emitIns(INS_f32x4_splat); - DROP(emit); + DROP; - PUSH_F64(emit, 0); + PUSH_F64(0); emit->emitIns(INS_f64x2_splat); - DROP(emit); + DROP; // Swizzle: [v128, v128] -> v128 - TEST_BINARY_V128(emit, v128Ones, INS_i8x16_swizzle); + TEST_BINARY_V128(v128Ones, INS_i8x16_swizzle); // A few comparisons: [v128, v128] -> v128 - TEST_BINARY_V128(emit, v128Ones, INS_i8x16_eq); - TEST_BINARY_V128(emit, v128Ones, INS_i32x4_ne); - TEST_BINARY_V128(emit, v128Ones, INS_f64x2_lt); + TEST_BINARY_V128(v128Ones, INS_i8x16_eq); + TEST_BINARY_V128(v128Ones, INS_i32x4_ne); + TEST_BINARY_V128(v128Ones, INS_f64x2_lt); // A few arithmetic ops - TEST_BINARY_V128(emit, v128Ones, INS_i8x16_add); - TEST_BINARY_V128(emit, v128Ones, INS_i32x4_mul); - TEST_UNARY_V128(emit, v128Ones, INS_f32x4_sqrt); - TEST_UNARY_V128(emit, v128Ones, INS_f64x2_neg); + TEST_BINARY_V128(v128Ones, INS_i8x16_add); + TEST_BINARY_V128(v128Ones, INS_i32x4_mul); + TEST_UNARY_V128(v128Ones, INS_f32x4_sqrt); + TEST_UNARY_V128(v128Ones, INS_f64x2_neg); // Bitwise ops - TEST_UNARY_V128(emit, v128Ones, INS_v128_not); - TEST_BINARY_V128(emit, v128Ones, INS_v128_and); - TEST_BINARY_V128(emit, v128Ones, INS_v128_or); - TEST_BINARY_V128(emit, v128Ones, INS_v128_xor); - TEST_BINARY_V128(emit, v128Ones, INS_v128_andnot); + TEST_UNARY_V128(v128Ones, INS_v128_not); + TEST_BINARY_V128(v128Ones, INS_v128_and); + TEST_BINARY_V128(v128Ones, INS_v128_or); + TEST_BINARY_V128(v128Ones, INS_v128_xor); + TEST_BINARY_V128(v128Ones, INS_v128_andnot); // Bitmask / any_true / all_true: v128 -> i32 - TEST_V128_TO_I32(emit, v128Ones, INS_v128_any_true); - TEST_V128_TO_I32(emit, v128Ones, INS_i8x16_all_true); - TEST_V128_TO_I32(emit, v128Ones, INS_i32x4_bitmask); + TEST_UNARY_V128(v128Ones, INS_v128_any_true); + TEST_UNARY_V128(v128Ones, INS_i8x16_all_true); + TEST_UNARY_V128(v128Ones, INS_i32x4_bitmask); // Conversion operations: v128 -> v128 - TEST_UNARY_V128(emit, v128Ones, INS_f32x4_convert_s_i32x4); - TEST_UNARY_V128(emit, v128Ones, INS_f64x2_convert_low_u_i32x4); - TEST_UNARY_V128(emit, v128Ones, INS_i32x4_trunc_sat_s_f32x4); + TEST_UNARY_V128(v128Ones, INS_f32x4_convert_s_i32x4); + TEST_UNARY_V128(v128Ones, INS_f64x2_convert_low_u_i32x4); + TEST_UNARY_V128(v128Ones, INS_i32x4_trunc_sat_s_f32x4); #undef PUSH_V128 #undef PUSH_I32 diff --git a/src/coreclr/jit/emitwasm.cpp b/src/coreclr/jit/emitwasm.cpp index 3b5da70f79ebf2..fd824957406e44 100644 --- a/src/coreclr/jit/emitwasm.cpp +++ b/src/coreclr/jit/emitwasm.cpp @@ -17,6 +17,44 @@ }; // clang-format on +bool isValidSimdElemSize(unsigned elemSize) +{ + // Valid SIMD configurations are i8x16, i16x8, i32x4, i64x2, f32x4, f64x2 + return (elemSize == 1) || (elemSize == 2) || (elemSize == 4) || (elemSize == 8); +} + +// -------------------------------------------------- +// isValidVectorIndex - returns true if the specified index is valid for the given SIMD element size +// Arguments: +// elemSize - emitAttr describing the size of the SIMD vector elements +// index - the index to validate + +bool emitter::isValidVectorIndex(uint8_t elemSize, uint8_t index) +{ + assert(isValidSimdElemSize(elemSize)); + + bool isValid = false; + switch (elemSize) + { + case 1: + isValid = (index < 16); + break; + case 2: + isValid = (index < 8); + break; + case 4: + isValid = (index < 4); + break; + case 8: + isValid = (index < 2); + break; + default: + unreached(); + } + + return isValid; +} + void emitter::emitIns(instruction ins) { instrDesc* id = emitNewInstrSmall(EA_8BYTE); @@ -422,9 +460,11 @@ void emitter::emitIns_V128Imm(instruction ins, const uint8_t* bytes) // void emitter::emitIns_Lane(instruction ins, emitAttr attr, uint8_t laneIdx) { - instrDesc* id = emitNewInstrSC(attr, laneIdx); - insFormat fmt = emitInsFormat(ins); + instrDesc* id = emitNewInstrSC(attr, laneIdx); + insFormat fmt = emitInsFormat(ins); + uint8_t elemSize = CodeGenInterface::instSimdElemSize(ins); assert(fmt == IF_LANE); + assert(isValidVectorIndex(elemSize, laneIdx)); id->idInsFmt(fmt); id->idIns(ins); @@ -446,8 +486,10 @@ void emitter::emitIns_MemargLane(instruction ins, emitAttr attr, cnsval_ssize_t { instrDescMemargLane* id = static_cast(emitAllocAnyInstr(sizeof(instrDescMemargLane), attr)); insFormat fmt = emitInsFormat(ins); + uint8_t elemSize = CodeGenInterface::instSimdElemSize(ins); assert(fmt == IF_MEMARG_LANE); assert(offset >= 0); + assert(isValidVectorIndex(elemSize, laneIdx)); id->idInsFmt(fmt); id->idIns(ins); diff --git a/src/coreclr/jit/emitwasm.h b/src/coreclr/jit/emitwasm.h index b9435a9372740b..dc8b3acbea00d5 100644 --- a/src/coreclr/jit/emitwasm.h +++ b/src/coreclr/jit/emitwasm.h @@ -16,6 +16,7 @@ void emitDispInst(instruction ins); /************************************************************************/ public: +bool isValidVectorIndex(uint8_t elemsize, uint8_t index); void emitIns(instruction ins); void emitIns_BlockTy(instruction ins, WasmValueType valType = WasmValueType::Invalid); void emitIns_I(instruction ins, emitAttr attr, cnsval_ssize_t imm); diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 8e7451a9bc1a86..a6c16871591bdc 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -598,6 +598,13 @@ bool CodeGenInterface::instHasPseudoName(instruction ins) } #endif // TARGET_XARCH +#if defined(TARGET_WASM) +uint8_t CodeGenInterface::instSimdElemSize(instruction ins) +{ + return static_cast((instInfo[ins] >> INST_INFO_ELEM_SIZE_SHIFT)); +} +#endif + /***************************************************************************** * * Generate a set instruction. diff --git a/src/coreclr/jit/instrswasm.h b/src/coreclr/jit/instrswasm.h index 136e0fac4f99b1..90e87b177e7cba 100644 --- a/src/coreclr/jit/instrswasm.h +++ b/src/coreclr/jit/instrswasm.h @@ -275,21 +275,21 @@ INST2(i64x2_splat, "i64x2.splat", 0, IF_OPCODE, 0xFD, 18) INST2(f32x4_splat, "f32x4.splat", 0, IF_OPCODE, 0xFD, 19) INST2(f64x2_splat, "f64x2.splat", 0, IF_OPCODE, 0xFD, 20) -// Extract/replace lane operations (lane index byte) -INST2(i8x16_extract_lane_s, "i8x16.extract_lane_s", 0, IF_LANE, 0xFD, 21) -INST2(i8x16_extract_lane_u, "i8x16.extract_lane_u", 0, IF_LANE, 0xFD, 22) -INST2(i8x16_replace_lane, "i8x16.replace_lane", 0, IF_LANE, 0xFD, 23) -INST2(i16x8_extract_lane_s, "i16x8.extract_lane_s", 0, IF_LANE, 0xFD, 24) -INST2(i16x8_extract_lane_u, "i16x8.extract_lane_u", 0, IF_LANE, 0xFD, 25) -INST2(i16x8_replace_lane, "i16x8.replace_lane", 0, IF_LANE, 0xFD, 26) -INST2(i32x4_extract_lane, "i32x4.extract_lane", 0, IF_LANE, 0xFD, 27) -INST2(i32x4_replace_lane, "i32x4.replace_lane", 0, IF_LANE, 0xFD, 28) -INST2(i64x2_extract_lane, "i64x2.extract_lane", 0, IF_LANE, 0xFD, 29) -INST2(i64x2_replace_lane, "i64x2.replace_lane", 0, IF_LANE, 0xFD, 30) -INST2(f32x4_extract_lane, "f32x4.extract_lane", 0, IF_LANE, 0xFD, 31) -INST2(f32x4_replace_lane, "f32x4.replace_lane", 0, IF_LANE, 0xFD, 32) -INST2(f64x2_extract_lane, "f64x2.extract_lane", 0, IF_LANE, 0xFD, 33) -INST2(f64x2_replace_lane, "f64x2.replace_lane", 0, IF_LANE, 0xFD, 34) +// Extract/replace lane operations (lane index byte), info byte is (elemSize << 1) +INST2(i8x16_extract_lane_s, "i8x16.extract_lane_s", 2, IF_LANE, 0xFD, 21) +INST2(i8x16_extract_lane_u, "i8x16.extract_lane_u", 2, IF_LANE, 0xFD, 22) +INST2(i8x16_replace_lane, "i8x16.replace_lane", 2, IF_LANE, 0xFD, 23) +INST2(i16x8_extract_lane_s, "i16x8.extract_lane_s", 4, IF_LANE, 0xFD, 24) +INST2(i16x8_extract_lane_u, "i16x8.extract_lane_u", 4, IF_LANE, 0xFD, 25) +INST2(i16x8_replace_lane, "i16x8.replace_lane", 4, IF_LANE, 0xFD, 26) +INST2(i32x4_extract_lane, "i32x4.extract_lane", 8, IF_LANE, 0xFD, 27) +INST2(i32x4_replace_lane, "i32x4.replace_lane", 8, IF_LANE, 0xFD, 28) +INST2(i64x2_extract_lane, "i64x2.extract_lane", 16, IF_LANE, 0xFD, 29) +INST2(i64x2_replace_lane, "i64x2.replace_lane", 16, IF_LANE, 0xFD, 30) +INST2(f32x4_extract_lane, "f32x4.extract_lane", 8, IF_LANE, 0xFD, 31) +INST2(f32x4_replace_lane, "f32x4.replace_lane", 8, IF_LANE, 0xFD, 32) +INST2(f64x2_extract_lane, "f64x2.extract_lane", 16, IF_LANE, 0xFD, 33) +INST2(f64x2_replace_lane, "f64x2.replace_lane", 16, IF_LANE, 0xFD, 34) // i8x16 comparisons INST2(i8x16_eq, "i8x16.eq", 0, IF_OPCODE, 0xFD, 35) @@ -360,15 +360,15 @@ INST2(v128_xor, "v128.xor", 0, IF_OPCODE, 0xFD, 81) INST2(v128_bitselect, "v128.bitselect", 0, IF_OPCODE, 0xFD, 82) INST2(v128_any_true, "v128.any_true", 0, IF_OPCODE, 0xFD, 83) -// Load/store lane operations (memarg + lane index) -INST2(v128_load8_lane, "v128.load8_lane", 0, IF_MEMARG_LANE, 0xFD, 84) -INST2(v128_load16_lane, "v128.load16_lane", 0, IF_MEMARG_LANE, 0xFD, 85) -INST2(v128_load32_lane, "v128.load32_lane", 0, IF_MEMARG_LANE, 0xFD, 86) -INST2(v128_load64_lane, "v128.load64_lane", 0, IF_MEMARG_LANE, 0xFD, 87) -INST2(v128_store8_lane, "v128.store8_lane", 0, IF_MEMARG_LANE, 0xFD, 88) -INST2(v128_store16_lane, "v128.store16_lane", 0, IF_MEMARG_LANE, 0xFD, 89) -INST2(v128_store32_lane, "v128.store32_lane", 0, IF_MEMARG_LANE, 0xFD, 90) -INST2(v128_store64_lane, "v128.store64_lane", 0, IF_MEMARG_LANE, 0xFD, 91) +// Load/store lane operations (memarg + lane index), info byte is (elemSize << 1) +INST2(v128_load8_lane, "v128.load8_lane", 2, IF_MEMARG_LANE, 0xFD, 84) +INST2(v128_load16_lane, "v128.load16_lane", 4, IF_MEMARG_LANE, 0xFD, 85) +INST2(v128_load32_lane, "v128.load32_lane", 8, IF_MEMARG_LANE, 0xFD, 86) +INST2(v128_load64_lane, "v128.load64_lane", 16, IF_MEMARG_LANE, 0xFD, 87) +INST2(v128_store8_lane, "v128.store8_lane", 2, IF_MEMARG_LANE, 0xFD, 88) +INST2(v128_store16_lane, "v128.store16_lane", 4, IF_MEMARG_LANE, 0xFD, 89) +INST2(v128_store32_lane, "v128.store32_lane", 8, IF_MEMARG_LANE, 0xFD, 90) +INST2(v128_store64_lane, "v128.store64_lane", 16, IF_MEMARG_LANE, 0xFD, 91) // Load zero operations (memarg) INST2(v128_load32_zero, "v128.load32_zero", 0, IF_MEMARG, 0xFD, 92) From 3dbc9a846fc7821da0d8d788757a273ee694fbd6 Mon Sep 17 00:00:00 2001 From: adamperlin Date: Thu, 21 May 2026 14:21:05 -0700 Subject: [PATCH 16/22] Additional review feedback --- src/coreclr/jit/emit.h | 2 +- src/coreclr/jit/emitfmtswasm.h | 2 +- src/coreclr/jit/emitwasm.cpp | 38 +++++++++++++++++++++------------- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index e857fea8610652..014d5f962e3013 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -2435,7 +2435,7 @@ class emitter uint8_t v128Bytes[16]; - void idV128Const(const uint8_t* bytes) + void idV128Const(const uint8_t bytes[16]) { assert(bytes != nullptr); memcpy(v128Bytes, bytes, 16); diff --git a/src/coreclr/jit/emitfmtswasm.h b/src/coreclr/jit/emitfmtswasm.h index c98078cb25d344..0f052e198f2f07 100644 --- a/src/coreclr/jit/emitfmtswasm.h +++ b/src/coreclr/jit/emitfmtswasm.h @@ -46,7 +46,7 @@ IF_DEF(CALL_INDIRECT, IS_NONE, NONE) // IF_DEF(TRY_TABLE, IS_NONE, NONE) // IF_DEF(CATCH_DECL, IS_NONE, NONE) // -IF_DEF(V128, IS_NONE, NONE) // <16 raw bytes> +IF_DEF(V128, IS_NONE, NONE) // <16 raw bytes> IF_DEF(LANE, IS_NONE, NONE) // IF_DEF(MEMARG_LANE, IS_NONE, NONE) // diff --git a/src/coreclr/jit/emitwasm.cpp b/src/coreclr/jit/emitwasm.cpp index fd824957406e44..8f01c096708d33 100644 --- a/src/coreclr/jit/emitwasm.cpp +++ b/src/coreclr/jit/emitwasm.cpp @@ -420,8 +420,22 @@ const uint8_t* emitter::emitGetV128ImmValue(const instrDesc* id) uint8_t emitter::emitGetLaneImmValue(const instrDesc* id) { - assert(id->idIsMemargLaneImm()); - return static_cast(id)->lane; + if (id->idIsMemargLaneImm()) + { + return static_cast(id)->lane; + } + else if (id->idInsFmt() == IF_LANE) + { + cnsval_size_t lane = emitGetInsSC(id); + assert(FitsIn(lane)); + return static_cast(lane); + } + else + { + noway_assert(!"Unexpected instruction format for lane immediate"); + } + + return 255; } //------------------------------------------------------------------------ @@ -435,7 +449,7 @@ uint8_t emitter::emitGetLaneImmValue(const instrDesc* id) // ins - instruction (currently used with INS_v128_const and INS_i8x16_shuffle) // bytes - pointer to 16 bytes of constant data // -void emitter::emitIns_V128Imm(instruction ins, const uint8_t* bytes) +void emitter::emitIns_V128Imm(instruction ins, const uint8_t bytes[16]) { assert(bytes != nullptr); instrDescV128Imm* id = static_cast(emitAllocAnyInstr(sizeof(instrDescV128Imm), EA_16BYTE)); @@ -1068,9 +1082,8 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_LANE: { dst += emitOutputOpcode(dst, ins); - cnsval_size_t laneIdx = emitGetInsSC(id); - assert(FitsIn(laneIdx)); - dst += emitOutputByte(dst, static_cast(laneIdx)); + uint8_t laneIdx = emitGetLaneImmValue(id); + dst += emitOutputByte(dst, laneIdx); break; } case IF_MEMARG_LANE: @@ -1373,20 +1386,17 @@ void emitter::emitDispIns( case IF_V128: { const uint8_t* imm = emitGetV128ImmValue(id); - printf(" 0x"); - for (int i = 15; i >= 0; i--) + for (int i = 0; i < 16; i++) { - printf("%02x", imm[i]); + printf(" 0x%02x", imm[i]); } } break; case IF_LANE: { - cnsval_size_t lane = emitGetInsSC(id); - assert(FitsIn(lane)); - - printf(" %u", (uint8_t)lane); + uint8_t lane = emitGetLaneImmValue(id); + printf(" [%u]", (uint8_t)lane); } break; @@ -1398,7 +1408,7 @@ void emitter::emitDispIns( dispLclVarInfoIfAny(); uint8_t lane = emitGetLaneImmValue(id); - printf(" %u", (uint8_t)lane); + printf(" [%u]", (uint8_t)lane); } break; From 35b8b7cb18519b55c3fbb52fc50bf5cbe8dd8782 Mon Sep 17 00:00:00 2001 From: Adam Perlin Date: Thu, 21 May 2026 15:07:04 -0700 Subject: [PATCH 17/22] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/coreclr/jit/emitwasm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/emitwasm.cpp b/src/coreclr/jit/emitwasm.cpp index 8f01c096708d33..688d4a7706c83e 100644 --- a/src/coreclr/jit/emitwasm.cpp +++ b/src/coreclr/jit/emitwasm.cpp @@ -17,7 +17,7 @@ }; // clang-format on -bool isValidSimdElemSize(unsigned elemSize) +static bool isValidSimdElemSize(unsigned elemSize) { // Valid SIMD configurations are i8x16, i16x8, i32x4, i64x2, f32x4, f64x2 return (elemSize == 1) || (elemSize == 2) || (elemSize == 4) || (elemSize == 8); From fbaf6a6800b7fa46f1d78b6c459cce489c1e585e Mon Sep 17 00:00:00 2001 From: adamperlin Date: Thu, 21 May 2026 15:21:00 -0700 Subject: [PATCH 18/22] Additional feedback --- src/coreclr/jit/codegeninterface.h | 2 +- src/coreclr/jit/emitwasm.cpp | 4 ++-- src/coreclr/jit/instr.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index 2cecb89cd278c4..36df2b5a2de2c9 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -215,7 +215,7 @@ class CodeGenInterface #if defined(TARGET_WASM) // On wasm, we store the simd element size in the upper 7 bits of the instruction info. // The lower bit is reserved as an FP flag. -#define INST_INFO_ELEM_SIZE_SHIFT 0x1 + static constexpr unsigned InstInfoElemSizeShift = 1; static uint8_t instSimdElemSize(instruction ins); #endif //------------------------------------------------------------------------- diff --git a/src/coreclr/jit/emitwasm.cpp b/src/coreclr/jit/emitwasm.cpp index 688d4a7706c83e..3d3bdeeb91437a 100644 --- a/src/coreclr/jit/emitwasm.cpp +++ b/src/coreclr/jit/emitwasm.cpp @@ -23,10 +23,10 @@ static bool isValidSimdElemSize(unsigned elemSize) return (elemSize == 1) || (elemSize == 2) || (elemSize == 4) || (elemSize == 8); } -// -------------------------------------------------- +// -------------------------------------------------------------------------------------------------- // isValidVectorIndex - returns true if the specified index is valid for the given SIMD element size // Arguments: -// elemSize - emitAttr describing the size of the SIMD vector elements +// elemSize - element size in bytes (1, 2, 4, or 8) // index - the index to validate bool emitter::isValidVectorIndex(uint8_t elemSize, uint8_t index) diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index a6c16871591bdc..34cca6e7f4e8ac 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -601,7 +601,7 @@ bool CodeGenInterface::instHasPseudoName(instruction ins) #if defined(TARGET_WASM) uint8_t CodeGenInterface::instSimdElemSize(instruction ins) { - return static_cast((instInfo[ins] >> INST_INFO_ELEM_SIZE_SHIFT)); + return static_cast((instInfo[ins] >> InstInfoElemSizeShift)); } #endif From 4b6201eef023b81cf121471642fe68811e1a05f2 Mon Sep 17 00:00:00 2001 From: adamperlin Date: Thu, 21 May 2026 15:29:27 -0700 Subject: [PATCH 19/22] More feedback --- src/coreclr/jit/codegeninterface.h | 6 +++--- src/coreclr/jit/codegenwasm.cpp | 1 - src/coreclr/jit/emitwasm.cpp | 4 ++-- src/coreclr/jit/emitwasm.h | 2 +- src/coreclr/jit/instr.cpp | 1 + 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index 36df2b5a2de2c9..e696c886cd9e28 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -213,10 +213,10 @@ class CodeGenInterface bool IsEmbeddedBroadcastEnabled(instruction ins, GenTree* op); #endif // TARGET_XARCH #if defined(TARGET_WASM) -// On wasm, we store the simd element size in the upper 7 bits of the instruction info. -// The lower bit is reserved as an FP flag. + // On wasm, we store the simd element size in the upper 7 bits of the instruction info. + // The lower bit is reserved as an FP flag. static constexpr unsigned InstInfoElemSizeShift = 1; - static uint8_t instSimdElemSize(instruction ins); + static uint8_t instSimdElemSize(instruction ins); #endif //------------------------------------------------------------------------- // Liveness-related fields & methods diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index d0f6835a368e16..6f9ddcf8a1fd65 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -3665,7 +3665,6 @@ void CodeGen::genWasmEmitterUnitTestsSimd() #undef DROP #undef TEST_UNARY_V128 #undef TEST_BINARY_V128 -#undef TEST_V128_TO_I32 #undef TEST_EXTRACT_LANE #undef TEST_REPLACE_LANE_I32 #undef TEST_REPLACE_LANE_I64 diff --git a/src/coreclr/jit/emitwasm.cpp b/src/coreclr/jit/emitwasm.cpp index 3d3bdeeb91437a..8a77869ab899b2 100644 --- a/src/coreclr/jit/emitwasm.cpp +++ b/src/coreclr/jit/emitwasm.cpp @@ -432,10 +432,10 @@ uint8_t emitter::emitGetLaneImmValue(const instrDesc* id) } else { - noway_assert(!"Unexpected instruction format for lane immediate"); + unreached(); } - return 255; + return 0; } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/emitwasm.h b/src/coreclr/jit/emitwasm.h index dc8b3acbea00d5..b5bbff12e7cc8d 100644 --- a/src/coreclr/jit/emitwasm.h +++ b/src/coreclr/jit/emitwasm.h @@ -33,7 +33,7 @@ void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2) void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs); // Packed SIMD instruction emit functions -void emitIns_V128Imm(instruction ins, const uint8_t* bytes); +void emitIns_V128Imm(instruction ins, const uint8_t bytes[16]); void emitIns_Lane(instruction ins, emitAttr attr, uint8_t laneIdx); void emitIns_MemargLane(instruction ins, emitAttr attr, cnsval_ssize_t offset, uint8_t laneIdx); diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 34cca6e7f4e8ac..500e94832339cc 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -601,6 +601,7 @@ bool CodeGenInterface::instHasPseudoName(instruction ins) #if defined(TARGET_WASM) uint8_t CodeGenInterface::instSimdElemSize(instruction ins) { + assert((unsigned)ins < ArrLen(instInfo)); return static_cast((instInfo[ins] >> InstInfoElemSizeShift)); } #endif From 6bdc88178e2e1672b767abad8fa04e2c636e8921 Mon Sep 17 00:00:00 2001 From: adamperlin Date: Thu, 21 May 2026 15:53:37 -0700 Subject: [PATCH 20/22] Change genDefineTempLabel for Wasm emitter unit tests --- src/coreclr/jit/codegenlinear.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index c09da7df8ad4a8..00b5ccf2740ac2 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2731,12 +2731,10 @@ void CodeGen::genEmitterUnitTests() #ifndef TARGET_WASM inst_JMP(EJ_jmp, skipLabel); #else - // On Wasm, we need a pre-defined label to emit a branch, so we use skipLabel for this. - // We skip over the generated emitter test code by nesting it in a block where the - // first instruction is a "skip" branch to the end of the block. - genDefineTempLabel(skipLabel); + // On Wasm, we skip over the generated emitter test code by nesting it in a block where the + // first instruction branches to the end of the block. GetEmitter()->emitIns_BlockTy(INS_block); - GetEmitter()->emitIns_J(INS_br, EA_4BYTE, 0, skipLabel); + GetEmitter()->emitIns_J(INS_br, EA_4BYTE, 0, nullptr); #endif // Add NOPs at the start and end for easier script parsing. @@ -2796,9 +2794,7 @@ void CodeGen::genEmitterUnitTests() instGen(INS_end); #endif -#ifndef TARGET_WASM genDefineTempLabel(skipLabel); -#endif instGen(INS_nop); instGen(INS_nop); instGen(INS_nop); From 6b6fdf135be271a8d6c16622adcac2e46641f5a8 Mon Sep 17 00:00:00 2001 From: adamperlin Date: Thu, 21 May 2026 17:29:22 -0700 Subject: [PATCH 21/22] Remove static --- src/coreclr/jit/emitwasm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/emitwasm.cpp b/src/coreclr/jit/emitwasm.cpp index 8a77869ab899b2..c39f02e7ba470c 100644 --- a/src/coreclr/jit/emitwasm.cpp +++ b/src/coreclr/jit/emitwasm.cpp @@ -17,7 +17,7 @@ }; // clang-format on -static bool isValidSimdElemSize(unsigned elemSize) +bool isValidSimdElemSize(unsigned elemSize) { // Valid SIMD configurations are i8x16, i16x8, i32x4, i64x2, f32x4, f64x2 return (elemSize == 1) || (elemSize == 2) || (elemSize == 4) || (elemSize == 8); From 017a70f1f791a2266c90d9fed8ca9792bb8e548e Mon Sep 17 00:00:00 2001 From: adamperlin Date: Fri, 22 May 2026 09:17:59 -0700 Subject: [PATCH 22/22] Additional feedback --- src/coreclr/jit/emitwasm.cpp | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/emitwasm.cpp b/src/coreclr/jit/emitwasm.cpp index c39f02e7ba470c..46e21686deb1a9 100644 --- a/src/coreclr/jit/emitwasm.cpp +++ b/src/coreclr/jit/emitwasm.cpp @@ -559,11 +559,6 @@ static bool HasOpcodePrefix(instruction ins) return GetOpcodePrefix(ins) != 0; } -inline static bool IsWasmSimdInstruction(instruction ins) -{ - return GetOpcodePrefix(ins) == 0xFD; -} - size_t emitter::emitSizeOfInsDsc(instrDesc* id) const { if (emitIsSmallInsDsc(id)) @@ -571,19 +566,14 @@ size_t emitter::emitSizeOfInsDsc(instrDesc* id) const return SMALL_IDSC_SIZE; } - if (IsWasmSimdInstruction(id->idIns())) + if (id->idIsMemargLaneImm()) { - // Some (not all) SIMD instructions have larger instrDescs, - // and these cases are handled here. - switch (id->idInsFmt()) - { - case IF_V128: - return sizeof(instrDescV128Imm); - case IF_MEMARG_LANE: - return sizeof(instrDescMemargLane); - default: // all other SIMD instructions can fit in a standard instrDesc - break; - } + return sizeof(instrDescMemargLane); + } + + if (id->idIsV128Imm()) + { + return sizeof(instrDescV128Imm); } if (id->idIsLargeCns()) @@ -746,6 +736,7 @@ unsigned emitter::instrDesc::idCodeSize() const case IF_MEMARG_LANE: { uint64_t align = emitGetAlignHintLog2(this); + assert(align < 64); // spec says align > 2^6 produces a memidx for multiple memories. size += SizeOfULEB128(align); size += idIsCnsReloc() ? PADDED_RELOC_SIZE : SizeOfULEB128(emitGetInsSC(this)); size += 1; // 1 byte lane index