Skip to content

Commit 2171541

Browse files
committed
Adding more SIMD constant folding support
1 parent 2410737 commit 2171541

3 files changed

Lines changed: 1117 additions & 94 deletions

File tree

src/coreclr/jit/simd.h

Lines changed: 170 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,39 @@ struct simd32_t
149149
}
150150
};
151151

152+
template <typename TBase>
153+
TBase EvaluateUnaryScalarSpecialized(genTreeOps oper, TBase arg0)
154+
{
155+
switch (oper)
156+
{
157+
case GT_NOT:
158+
{
159+
return ~arg0;
160+
}
161+
162+
default:
163+
{
164+
unreached();
165+
}
166+
}
167+
}
168+
169+
template <>
170+
inline float EvaluateUnaryScalarSpecialized<float>(genTreeOps oper, float arg0)
171+
{
172+
uint32_t arg0Bits = *reinterpret_cast<uint32_t*>(&arg0);
173+
uint32_t resultBits = EvaluateUnaryScalarSpecialized<uint32_t>(oper, arg0Bits);
174+
return *reinterpret_cast<float*>(&resultBits);
175+
}
176+
177+
template <>
178+
inline double EvaluateUnaryScalarSpecialized<double>(genTreeOps oper, double arg0)
179+
{
180+
uint64_t arg0Bits = *reinterpret_cast<uint64_t*>(&arg0);
181+
uint64_t resultBits = EvaluateUnaryScalarSpecialized<uint64_t>(oper, arg0Bits);
182+
return *reinterpret_cast<double*>(&resultBits);
183+
}
184+
152185
template <typename TBase>
153186
TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0)
154187
{
@@ -161,7 +194,7 @@ TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0)
161194

162195
default:
163196
{
164-
unreached();
197+
return EvaluateUnaryScalarSpecialized<TBase>(oper, arg0);
165198
}
166199
}
167200
}
@@ -268,6 +301,119 @@ void EvaluateUnarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
268301
}
269302
}
270303

304+
template <typename TBase>
305+
TBase EvaluateBinaryScalarRSZ(TBase arg0, TBase arg1)
306+
{
307+
return arg0 >> (arg1 & ((sizeof(TBase) * 8) - 1));
308+
}
309+
310+
template <>
311+
inline int8_t EvaluateBinaryScalarRSZ<int8_t>(int8_t arg0, int8_t arg1)
312+
{
313+
uint8_t arg0Bits = static_cast<uint8_t>(arg0);
314+
uint8_t arg1Bits = static_cast<uint8_t>(arg1);
315+
316+
uint8_t resultBits = EvaluateBinaryScalarRSZ<uint8_t>(arg0Bits, arg1Bits);
317+
return static_cast<int8_t>(resultBits);
318+
}
319+
320+
template <>
321+
inline int16_t EvaluateBinaryScalarRSZ<int16_t>(int16_t arg0, int16_t arg1)
322+
{
323+
uint16_t arg0Bits = static_cast<uint16_t>(arg0);
324+
uint16_t arg1Bits = static_cast<uint16_t>(arg1);
325+
326+
uint16_t resultBits = EvaluateBinaryScalarRSZ<uint16_t>(arg0Bits, arg1Bits);
327+
return static_cast<int16_t>(resultBits);
328+
}
329+
330+
template <>
331+
inline int32_t EvaluateBinaryScalarRSZ<int32_t>(int32_t arg0, int32_t arg1)
332+
{
333+
uint32_t arg0Bits = static_cast<uint32_t>(arg0);
334+
uint32_t arg1Bits = static_cast<uint32_t>(arg1);
335+
336+
uint32_t resultBits = EvaluateBinaryScalarRSZ<uint32_t>(arg0Bits, arg1Bits);
337+
return static_cast<int32_t>(resultBits);
338+
}
339+
340+
template <>
341+
inline int64_t EvaluateBinaryScalarRSZ<int64_t>(int64_t arg0, int64_t arg1)
342+
{
343+
uint64_t arg0Bits = static_cast<uint64_t>(arg0);
344+
uint64_t arg1Bits = static_cast<uint64_t>(arg1);
345+
346+
uint64_t resultBits = EvaluateBinaryScalarRSZ<uint64_t>(arg0Bits, arg1Bits);
347+
return static_cast<int64_t>(resultBits);
348+
}
349+
350+
template <typename TBase>
351+
TBase EvaluateBinaryScalarSpecialized(genTreeOps oper, TBase arg0, TBase arg1)
352+
{
353+
switch (oper)
354+
{
355+
case GT_AND:
356+
{
357+
return arg0 & arg1;
358+
}
359+
360+
case GT_AND_NOT:
361+
{
362+
return arg0 & ~arg1;
363+
}
364+
365+
case GT_LSH:
366+
{
367+
return arg0 << (arg1 & ((sizeof(TBase) * 8) - 1));
368+
}
369+
370+
case GT_OR:
371+
{
372+
return arg0 | arg1;
373+
}
374+
375+
case GT_RSH:
376+
{
377+
return arg0 >> (arg1 & ((sizeof(TBase) * 8) - 1));
378+
}
379+
380+
case GT_RSZ:
381+
{
382+
return EvaluateBinaryScalarRSZ<TBase>(arg0, arg1);
383+
}
384+
385+
case GT_XOR:
386+
{
387+
return arg0 ^ arg1;
388+
}
389+
390+
default:
391+
{
392+
unreached();
393+
}
394+
}
395+
}
396+
397+
template <>
398+
inline float EvaluateBinaryScalarSpecialized<float>(genTreeOps oper, float arg0, float arg1)
399+
{
400+
uint32_t arg0Bits = *reinterpret_cast<uint32_t*>(&arg0);
401+
uint32_t arg1Bits = *reinterpret_cast<uint32_t*>(&arg1);
402+
403+
uint32_t resultBits = EvaluateBinaryScalarSpecialized<uint32_t>(oper, arg0Bits, arg1Bits);
404+
return *reinterpret_cast<float*>(&resultBits);
405+
}
406+
407+
template <>
408+
inline double EvaluateBinaryScalarSpecialized<double>(genTreeOps oper, double arg0, double arg1)
409+
{
410+
uint64_t arg0Bits = *reinterpret_cast<uint64_t*>(&arg0);
411+
uint64_t arg1Bits = *reinterpret_cast<uint64_t*>(&arg1);
412+
413+
uint64_t resultBits = EvaluateBinaryScalarSpecialized<uint64_t>(oper, arg0Bits, arg1Bits);
414+
return *reinterpret_cast<double*>(&resultBits);
415+
}
416+
271417
template <typename TBase>
272418
TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1)
273419
{
@@ -278,14 +424,24 @@ TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1)
278424
return arg0 + arg1;
279425
}
280426

427+
case GT_DIV:
428+
{
429+
return arg0 / arg1;
430+
}
431+
432+
case GT_MUL:
433+
{
434+
return arg0 * arg1;
435+
}
436+
281437
case GT_SUB:
282438
{
283439
return arg0 - arg1;
284440
}
285441

286442
default:
287443
{
288-
unreached();
444+
return EvaluateBinaryScalarSpecialized<TBase>(oper, arg0, arg1);
289445
}
290446
}
291447
}
@@ -395,6 +551,18 @@ void EvaluateBinarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
395551
}
396552
}
397553

554+
template <typename TSimd, typename TBase>
555+
void BroadcastConstantToSimd(TSimd* result, TBase arg0)
556+
{
557+
uint32_t count = sizeof(TSimd) / sizeof(TBase);
558+
559+
for (uint32_t i = 0; i < count; i++)
560+
{
561+
// Safely execute `result[i] = arg0`
562+
memcpy(&result->u8[i * sizeof(TBase)], &arg0, sizeof(TBase));
563+
}
564+
}
565+
398566
#ifdef FEATURE_SIMD
399567

400568
#ifdef TARGET_XARCH

0 commit comments

Comments
 (0)