Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 104 additions & 74 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31560,90 +31560,101 @@ bool GenTree::IsInvariant() const

//-------------------------------------------------------------------
// IsVectorPerElementMask: returns true if this node is a vector constant per-element mask
// (every element has either all bits set or none of them).
// (every element has either all bits set or none of them) for the
// given simd size and base type.
//
// Arguments:
// simdBaseType - the base type of the constant being checked.
// simdSize - the size of the SIMD type of the intrinsic.
//
// Returns:
// True if this node is a vector constant per-element mask.
// True if this node is a per-element mask compatible with simdBaseType and simdSize
//
bool GenTree::IsVectorPerElementMask(var_types simdBaseType, unsigned simdSize) const
{
#ifdef FEATURE_SIMD
// This should be kept in sync with ValueNumStore::IsVectorPerElementMask

var_types simdType = TypeGet();
unsigned elementCount = GenTreeVecCon::ElementCount(simdSize, simdBaseType);

assert(varTypeIsSIMD(simdType));
assert(genTypeSize(simdType) == simdSize);

if (IsCnsVec())
{
const GenTreeVecCon* vecCon = AsVecCon();
return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal, simdBaseType, elementCount);
}

int elementCount = vecCon->ElementCount(simdSize, simdBaseType);
if (!OperIsHWIntrinsic())
{
return false;
}

switch (simdBaseType)
{
case TYP_BYTE:
case TYP_UBYTE:
return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u8[0], elementCount);
case TYP_SHORT:
case TYP_USHORT:
return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u16[0], elementCount);
case TYP_INT:
case TYP_UINT:
case TYP_FLOAT:
return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u32[0], elementCount);
case TYP_LONG:
case TYP_ULONG:
case TYP_DOUBLE:
return ElementsAreAllBitsSetOrZero(&vecCon->gtSimdVal.u64[0], elementCount);
default:
unreached();
}
const GenTreeHWIntrinsic* intrinsic = AsHWIntrinsic();

NamedIntrinsic intrinsicId = intrinsic->GetHWIntrinsicId();
unsigned intrinsicSimdSize = intrinsic->GetSimdSize();
var_types intrinsicSimdBaseType = intrinsic->GetSimdBaseType();

Comment thread
tannergooding marked this conversation as resolved.
if (intrinsicSimdSize != simdSize)
{
return false;
}
else if (OperIsHWIntrinsic())

if (HWIntrinsicInfo::ReturnsPerElementMask(intrinsicId))
{
const GenTreeHWIntrinsic* intrinsic = AsHWIntrinsic();
const NamedIntrinsic intrinsicId = intrinsic->GetHWIntrinsicId();
// When producing a SIMD result, we need for it to
// have a base type that is the same size or larger
// as what we expect.
//
// Consider for example us expecting `byte` and the
// intrinsic here produces `ushort`. In that case we
// expect every byte to be either `0x00` or `0xFF`
// and the intrinsic produces either `0x0000` or `0xFFFF`
// and so it meets this need.
//
// However, the inverse is not safe as we would expect
// `0x0000` or `0xFFFF`, but the intrinsic could produce
// `0x00FF` or `0xFF00` which fails the expectation.

if (HWIntrinsicInfo::ReturnsPerElementMask(intrinsicId))
{
// We directly return a per-element mask
return true;
}
return genTypeSize(intrinsicSimdBaseType) >= genTypeSize(simdBaseType);
}

bool isScalar = false;
genTreeOps oper = intrinsic->GetOperForHWIntrinsicId(&isScalar);
bool isScalar = false;
genTreeOps oper = GenTreeHWIntrinsic::GetOperForHWIntrinsicId(intrinsicId, simdBaseType, &isScalar);

switch (oper)
switch (oper)
{
case GT_AND:
case GT_AND_NOT:
case GT_OR:
case GT_OR_NOT:
case GT_XOR:
case GT_XOR_NOT:
{
case GT_AND:
case GT_AND_NOT:
case GT_OR:
case GT_OR_NOT:
case GT_XOR:
case GT_XOR_NOT:
{
// We are a binary bitwise operation where both inputs are per-element masks
return intrinsic->Op(1)->IsVectorPerElementMask(simdBaseType, simdSize) &&
intrinsic->Op(2)->IsVectorPerElementMask(simdBaseType, simdSize);
}
// We are a binary bitwise operation where both inputs are per-element masks
//
// While some cases like OR could combine in ways that produce a usable mask
// there isn't any way to statically determine this for non-constants and
// the constant cases should've already been folded.

case GT_NOT:
{
// We are an unary bitwise operation where the input is a per-element mask
return intrinsic->Op(1)->IsVectorPerElementMask(simdBaseType, simdSize);
}
return intrinsic->Op(1)->IsVectorPerElementMask(simdBaseType, simdSize) &&
intrinsic->Op(2)->IsVectorPerElementMask(simdBaseType, simdSize);
}

default:
{
assert(!GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic(oper));
break;
}
case GT_NOT:
{
// We are an unary bitwise operation where the input is a per-element mask
return intrinsic->Op(1)->IsVectorPerElementMask(simdBaseType, simdSize);
}

return false;
}
else if (IsCnsMsk())
{
return true;
default:
{
assert(!GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic(oper));
break;
}
}
#endif // FEATURE_SIMD

Expand Down Expand Up @@ -33002,24 +33013,24 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
break;
}
}
else if (otherNode->OperIsHWIntrinsic())
else if (otherNode->IsVectorPerElementMask(simdBaseType, simdSize))
{
GenTreeHWIntrinsic* otherIntrinsic = otherNode->AsHWIntrinsic();
NamedIntrinsic otherIntrinsicId = otherIntrinsic->GetHWIntrinsicId();

if (HWIntrinsicInfo::ReturnsPerElementMask(otherIntrinsicId) &&
(genTypeSize(simdBaseType) == genTypeSize(otherIntrinsic->GetSimdBaseType())))
// Handle `Equals(PerElementMask, AllBitsSet)` and `Equals(AllBitsSet, PerElementMask)` for
// integrals
if (cnsNode->IsVectorAllBitsSet())
{
// This optimization is only safe if we know the other node produces
// AllBitsSet or Zero per element and if the outer comparison is the
// same size as what the other node produces for its mask
// We are comparing something that is known per element to be either
// AllBitsSet or Zero, with AllBitsSet.
//
// In such a case:
// * `AllBitsSet == AllBitsSet` is true and so produces `AllBitsSet`
// * `AllBitsSet == Zero` is false and so produces `Zero`
//
// This means that we are not changing anything and can just return
// the per element mask

// Handle `(Mask == AllBitsSet) == Mask` and `(AllBitsSet == Mask) == Mask` for integrals
if (cnsNode->IsVectorAllBitsSet())
{
resultNode = otherNode;
break;
}
resultNode = otherNode;
break;
}
}
break;
Expand Down Expand Up @@ -33190,6 +33201,25 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
break;
}
}
else if (otherNode->IsVectorPerElementMask(simdBaseType, simdSize))
{
// Handle `~Equals(PerElementMask, Zero)` and `~Equals(Zero, PerElementMask)` for integrals
if (cnsNode->IsVectorZero())
{
// We are comparing something that is known per element to be either
// AllBitsSet or Zero, with Zero.
//
// In such a case:
// * `AllBitsSet != Zero` is true and so produces `AllBitsSet`
// * `Zero != Zero` is false and so produces `Zero`
//
// This means that we are not changing anything and can just return
// the per element mask

resultNode = otherNode;
break;
}
}
else if (otherNode->OperIsHWIntrinsic())
{
GenTreeHWIntrinsic* otherIntrinsic = otherNode->AsHWIntrinsic();
Expand Down
Loading
Loading