@@ -149,6 +149,39 @@ struct simd32_t
149149 }
150150};
151151
152+ template <typename TBase>
153+ TBase EvaluateUnaryScalarSpecialized (genTreeOps oper, TBase arg0)
154+ {
155+ switch (oper)
156+ {
157+ case GT_NOT:
158+ {
159+ return ~arg0;
160+ }
161+
162+ default :
163+ {
164+ unreached ();
165+ }
166+ }
167+ }
168+
169+ template <>
170+ inline float EvaluateUnaryScalarSpecialized<float >(genTreeOps oper, float arg0)
171+ {
172+ uint32_t arg0Bits = *reinterpret_cast <uint32_t *>(&arg0);
173+ uint32_t resultBits = EvaluateUnaryScalarSpecialized<uint32_t >(oper, arg0Bits);
174+ return *reinterpret_cast <float *>(&resultBits);
175+ }
176+
177+ template <>
178+ inline double EvaluateUnaryScalarSpecialized<double >(genTreeOps oper, double arg0)
179+ {
180+ uint64_t arg0Bits = *reinterpret_cast <uint64_t *>(&arg0);
181+ uint64_t resultBits = EvaluateUnaryScalarSpecialized<uint64_t >(oper, arg0Bits);
182+ return *reinterpret_cast <double *>(&resultBits);
183+ }
184+
152185template <typename TBase>
153186TBase EvaluateUnaryScalar (genTreeOps oper, TBase arg0)
154187{
@@ -161,7 +194,7 @@ TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0)
161194
162195 default :
163196 {
164- unreached ( );
197+ return EvaluateUnaryScalarSpecialized<TBase>(oper, arg0 );
165198 }
166199 }
167200}
@@ -268,6 +301,119 @@ void EvaluateUnarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
268301 }
269302}
270303
304+ template <typename TBase>
305+ TBase EvaluateBinaryScalarRSZ (TBase arg0, TBase arg1)
306+ {
307+ return arg0 >> (arg1 & ((sizeof (TBase) * 8 ) - 1 ));
308+ }
309+
310+ template <>
311+ inline int8_t EvaluateBinaryScalarRSZ<int8_t >(int8_t arg0, int8_t arg1)
312+ {
313+ uint8_t arg0Bits = static_cast <uint8_t >(arg0);
314+ uint8_t arg1Bits = static_cast <uint8_t >(arg1);
315+
316+ uint8_t resultBits = EvaluateBinaryScalarRSZ<uint8_t >(arg0Bits, arg1Bits);
317+ return static_cast <int8_t >(resultBits);
318+ }
319+
320+ template <>
321+ inline int16_t EvaluateBinaryScalarRSZ<int16_t >(int16_t arg0, int16_t arg1)
322+ {
323+ uint16_t arg0Bits = static_cast <uint16_t >(arg0);
324+ uint16_t arg1Bits = static_cast <uint16_t >(arg1);
325+
326+ uint16_t resultBits = EvaluateBinaryScalarRSZ<uint16_t >(arg0Bits, arg1Bits);
327+ return static_cast <int16_t >(resultBits);
328+ }
329+
330+ template <>
331+ inline int32_t EvaluateBinaryScalarRSZ<int32_t >(int32_t arg0, int32_t arg1)
332+ {
333+ uint32_t arg0Bits = static_cast <uint32_t >(arg0);
334+ uint32_t arg1Bits = static_cast <uint32_t >(arg1);
335+
336+ uint32_t resultBits = EvaluateBinaryScalarRSZ<uint32_t >(arg0Bits, arg1Bits);
337+ return static_cast <int32_t >(resultBits);
338+ }
339+
340+ template <>
341+ inline int64_t EvaluateBinaryScalarRSZ<int64_t >(int64_t arg0, int64_t arg1)
342+ {
343+ uint64_t arg0Bits = static_cast <uint64_t >(arg0);
344+ uint64_t arg1Bits = static_cast <uint64_t >(arg1);
345+
346+ uint64_t resultBits = EvaluateBinaryScalarRSZ<uint64_t >(arg0Bits, arg1Bits);
347+ return static_cast <int64_t >(resultBits);
348+ }
349+
350+ template <typename TBase>
351+ TBase EvaluateBinaryScalarSpecialized (genTreeOps oper, TBase arg0, TBase arg1)
352+ {
353+ switch (oper)
354+ {
355+ case GT_AND:
356+ {
357+ return arg0 & arg1;
358+ }
359+
360+ case GT_AND_NOT:
361+ {
362+ return arg0 & ~arg1;
363+ }
364+
365+ case GT_LSH:
366+ {
367+ return arg0 << (arg1 & ((sizeof (TBase) * 8 ) - 1 ));
368+ }
369+
370+ case GT_OR:
371+ {
372+ return arg0 | arg1;
373+ }
374+
375+ case GT_RSH:
376+ {
377+ return arg0 >> (arg1 & ((sizeof (TBase) * 8 ) - 1 ));
378+ }
379+
380+ case GT_RSZ:
381+ {
382+ return EvaluateBinaryScalarRSZ<TBase>(arg0, arg1);
383+ }
384+
385+ case GT_XOR:
386+ {
387+ return arg0 ^ arg1;
388+ }
389+
390+ default :
391+ {
392+ unreached ();
393+ }
394+ }
395+ }
396+
397+ template <>
398+ inline float EvaluateBinaryScalarSpecialized<float >(genTreeOps oper, float arg0, float arg1)
399+ {
400+ uint32_t arg0Bits = *reinterpret_cast <uint32_t *>(&arg0);
401+ uint32_t arg1Bits = *reinterpret_cast <uint32_t *>(&arg1);
402+
403+ uint32_t resultBits = EvaluateBinaryScalarSpecialized<uint32_t >(oper, arg0Bits, arg1Bits);
404+ return *reinterpret_cast <float *>(&resultBits);
405+ }
406+
407+ template <>
408+ inline double EvaluateBinaryScalarSpecialized<double >(genTreeOps oper, double arg0, double arg1)
409+ {
410+ uint64_t arg0Bits = *reinterpret_cast <uint64_t *>(&arg0);
411+ uint64_t arg1Bits = *reinterpret_cast <uint64_t *>(&arg1);
412+
413+ uint64_t resultBits = EvaluateBinaryScalarSpecialized<uint64_t >(oper, arg0Bits, arg1Bits);
414+ return *reinterpret_cast <double *>(&resultBits);
415+ }
416+
271417template <typename TBase>
272418TBase EvaluateBinaryScalar (genTreeOps oper, TBase arg0, TBase arg1)
273419{
@@ -278,14 +424,24 @@ TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1)
278424 return arg0 + arg1;
279425 }
280426
427+ case GT_DIV:
428+ {
429+ return arg0 / arg1;
430+ }
431+
432+ case GT_MUL:
433+ {
434+ return arg0 * arg1;
435+ }
436+
281437 case GT_SUB:
282438 {
283439 return arg0 - arg1;
284440 }
285441
286442 default :
287443 {
288- unreached ( );
444+ return EvaluateBinaryScalarSpecialized<TBase>(oper, arg0, arg1 );
289445 }
290446 }
291447}
@@ -395,6 +551,18 @@ void EvaluateBinarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
395551 }
396552}
397553
554+ template <typename TSimd, typename TBase>
555+ void BroadcastConstantToSimd (TSimd* result, TBase arg0)
556+ {
557+ uint32_t count = sizeof (TSimd) / sizeof (TBase);
558+
559+ for (uint32_t i = 0 ; i < count; i++)
560+ {
561+ // Safely execute `result[i] = arg0`
562+ memcpy (&result->u8 [i * sizeof (TBase)], &arg0, sizeof (TBase));
563+ }
564+ }
565+
398566#ifdef FEATURE_SIMD
399567
400568#ifdef TARGET_XARCH
0 commit comments