-
Notifications
You must be signed in to change notification settings - Fork 5.4k
[RyuJit/WASW] Insert temporaries when needed in the stackifier #125146
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX | |
|
|
||
| #include "lower.h" | ||
|
|
||
| static void SetMultiplyUsed(GenTree* node) | ||
| { | ||
| assert(varTypeIsEnregisterable(node)); | ||
| assert(!node->isContained()); | ||
| node->gtLIRFlags |= LIR::Flags::MultiplyUsed; | ||
| } | ||
|
|
||
| //------------------------------------------------------------------------ | ||
| // IsCallTargetInRange: Can a call target address be encoded in-place? | ||
| // | ||
|
|
@@ -453,7 +460,7 @@ void Lowering::ContainCheckSelect(GenTreeOp* node) | |
| } | ||
|
|
||
| //------------------------------------------------------------------------ | ||
| // AfterLowerBlock: stackify the nodes in this block. | ||
| // AfterLowerBlocks: stackify the nodes in all blocks. | ||
| // | ||
| // Stackification involves moving nodes around and inserting temporaries | ||
| // as necessary. We expect the vast majority of IR to already be in correct | ||
|
|
@@ -463,61 +470,69 @@ void Lowering::ContainCheckSelect(GenTreeOp* node) | |
| // the introduced temporaries can get enregistered and the last-use info | ||
| // on LCL_VAR nodes in RA is readily correct. | ||
| // | ||
| void Lowering::AfterLowerBlock() | ||
| void Lowering::AfterLowerBlocks() | ||
| { | ||
| struct Temporary | ||
| { | ||
| unsigned LclNum; | ||
| Temporary* Prev = nullptr; | ||
| }; | ||
|
|
||
| class Stackifier | ||
| { | ||
| Lowering* m_lower; | ||
| bool m_anyChanges = false; | ||
| Lowering* m_lower; | ||
| Compiler* m_compiler; | ||
| ArrayStack<GenTree**> m_stack; | ||
| unsigned m_minimumTempLclNum; | ||
| Temporary* m_availableTemps[static_cast<unsigned>(WasmValueType::Count)] = {}; | ||
| Temporary* m_unusedTemps[static_cast<unsigned>(WasmValueType::Count)] = {}; | ||
| bool m_anyChanges = false; | ||
|
|
||
| public: | ||
| Stackifier(Lowering* lower) | ||
| : m_lower(lower) | ||
| , m_compiler(lower->m_compiler) | ||
| , m_stack(m_compiler->getAllocator(CMK_Lower)) | ||
| , m_minimumTempLclNum(m_compiler->lvaCount) | ||
| { | ||
| } | ||
|
|
||
| void StackifyCurrentBlock() | ||
| void StackifyBlock(BasicBlock* block) | ||
| { | ||
| GenTree* node = m_lower->BlockRange().LastNode(); | ||
| m_anyChanges = false; | ||
| m_lower->m_block = block; | ||
| GenTree* node = block->lastNode(); | ||
| while (node != nullptr) | ||
| { | ||
| assert(IsDataFlowRoot(node)); | ||
| node = StackifyTree(node); | ||
| } | ||
| m_lower->m_block = nullptr; | ||
|
|
||
| if (!m_anyChanges) | ||
| { | ||
| JITDUMP(FMT_BB ": already in WASM value stack order\n", m_lower->m_block->bbNum); | ||
| } | ||
| } | ||
|
|
||
| bool CanMoveNodePast(GenTree* node, GenTree* past) | ||
| { | ||
| bool result = node->IsInvariant() || node->isContained() || | ||
| (node->OperIs(GT_LCL_VAR) && | ||
| !m_lower->m_compiler->lvaGetDesc(node->AsLclVarCommon())->IsAddressExposed()); | ||
| JITDUMP(FMT_BB ": %s\n", block->bbNum, | ||
| m_anyChanges ? "stackified with some changes" : "already in WASM value stack order"); | ||
|
|
||
| if (result) | ||
| for (WasmValueType type = WasmValueType::First; type < WasmValueType::Count; ++type) | ||
| { | ||
| assert(m_lower->IsInvariantInRange(node, past)); | ||
| assert((m_unusedTemps[static_cast<unsigned>(type)] == nullptr) && "Some temporaries were not released"); | ||
| } | ||
|
|
||
| return result; | ||
| } | ||
|
|
||
| GenTree* StackifyTree(GenTree* root) | ||
| { | ||
| ArrayStack<GenTree*>* stack = &m_lower->m_stackificationStack; | ||
| int initialDepth = stack->Height(); | ||
| int initialDepth = m_stack.Height(); | ||
|
|
||
| // Simple greedy algorithm working backwards. The invariant is that the stack top must be placed right next | ||
| // to (in normal linear order - before) the node we last stackified. | ||
| stack->Push(root); | ||
| GenTree* current = root->gtNext; | ||
| while (stack->Height() != initialDepth) | ||
| m_stack.Push(&root); | ||
| ReleaseTemporariesDefinedBy(root); | ||
|
|
||
| GenTree* lastStackified = root->gtNext; | ||
| while (m_stack.Height() != initialDepth) | ||
| { | ||
| GenTree* node = stack->Pop(); | ||
| GenTree* prev = (current != nullptr) ? current->gtPrev : root; | ||
| GenTree** use = m_stack.Pop(); | ||
| GenTree* node = *use; | ||
| GenTree* prev = (lastStackified != nullptr) ? lastStackified->gtPrev : root; | ||
| while (node != prev) | ||
| { | ||
| // Maybe this is an intervening void-equivalent node that we can also just stackify. | ||
|
|
@@ -530,51 +545,164 @@ void Lowering::AfterLowerBlock() | |
| // At this point, we'll have to modify the IR in some way. In general, these cases should be quite | ||
| // rare, introduced in lowering only. All HIR-induced cases (such as from "gtSetEvalOrder") should | ||
| // instead be ifdef-ed out for WASM. | ||
| m_anyChanges = true; | ||
|
|
||
| // Invariant nodes can be safely moved by the stackifier with no side effects. | ||
| // For other nodes, the side effects would require us to turn them into a temporary local, but this | ||
| // is not possible for contained nodes like an IND inside a STORE_BLK. However, the few types of | ||
| // contained nodes we have in Wasm should be safe to move freely since the lack of 'dup' or | ||
| // persistent registers in Wasm means that the actual codegen will trigger the side effect(s) and | ||
| // store the result into a Wasm local for any later uses during the containing node's execution, | ||
| // i.e. cpobj where the src and dest get stashed at the start and then used as add operands | ||
| // repeatedly. | ||
| // Locals can also be safely moved as long as they aren't address-exposed due to local var nodes | ||
| // being implicitly pseudo-contained. | ||
| // TODO-WASM: Verify that it is actually safe to do this for all contained nodes. | ||
| if (CanMoveNodePast(node, prev->gtNext)) | ||
| INDEBUG(const char* reason); | ||
| if (CanMoveForward(node DEBUGARG(&reason))) | ||
| { | ||
| JITDUMP("Stackifier moving node [%06u] after [%06u]\n", Compiler::dspTreeID(node), | ||
| Compiler::dspTreeID(prev)); | ||
| m_lower->BlockRange().Remove(node); | ||
| m_lower->BlockRange().InsertAfter(prev, node); | ||
| break; | ||
| MoveForward(node, prev DEBUGARG(reason)); | ||
| } | ||
|
|
||
| JITDUMP("node==[%06u] prev==[%06u]\n", Compiler::dspTreeID(node), Compiler::dspTreeID(prev)); | ||
| NYI_WASM("IR not in a stackified form"); | ||
| else | ||
| { | ||
| node = ReplaceWithTemporary(use, prev); | ||
| } | ||
| m_anyChanges = true; | ||
| break; | ||
| } | ||
|
|
||
| // In stack order, the last operand is closest to its parent, thus put on top here. | ||
| node->VisitOperands([stack](GenTree* operand) { | ||
| stack->Push(operand); | ||
| node->VisitOperandUses([this](GenTree** use) { | ||
| m_stack.Push(use); | ||
| return GenTree::VisitResult::Continue; | ||
| }); | ||
| current = node; | ||
| lastStackified = node; | ||
| } | ||
|
|
||
| return current->gtPrev; | ||
| return lastStackified->gtPrev; | ||
| } | ||
|
|
||
| bool IsDataFlowRoot(GenTree* node) | ||
| { | ||
| return !node->IsValue() || node->IsUnusedValue(); | ||
| } | ||
|
|
||
| bool CanMoveForward(GenTree* node DEBUGARG(const char** pReason)) | ||
| { | ||
| if (node->IsInvariant()) | ||
| { | ||
| // Leaf node without control or dataflow dependencies. | ||
| INDEBUG(*pReason = "invariant"); | ||
| return true; | ||
| } | ||
|
|
||
| if (node->isContained()) | ||
| { | ||
| // Contained nodes are part of their parent so their position in the LIR stream in not significant. | ||
| // As a fiction that simplifies this algorithm, we move them to the place where they would be were | ||
| // they not contained. | ||
| INDEBUG(*pReason = "contained"); | ||
| return true; | ||
| } | ||
|
|
||
| if (node->OperIs(GT_LCL_VAR) && !m_compiler->lvaGetDesc(node->AsLclVarCommon())->IsAddressExposed()) | ||
| { | ||
| // By IR invariants, there can be no intervening stores between a local's position in the LIR stream | ||
| // and its parent. So we can always move a local forward, closer to its parent. | ||
| INDEBUG(*pReason = "local"); | ||
| return true; | ||
| } | ||
|
|
||
| // TODO-WASM: devise a less-than-quadratic (ideally linear) algorithm that would allow us to handle more | ||
| // complex cases here. | ||
| return false; | ||
| } | ||
|
|
||
| void MoveForward(GenTree* node, GenTree* prev DEBUGARG(const char* reason)) | ||
| { | ||
| JITDUMP("Stackifier moving [%06u] after [%06u]: %s\n", Compiler::dspTreeID(node), Compiler::dspTreeID(prev), | ||
| reason); | ||
| assert(m_lower->IsInvariantInRange(node, prev->gtNext)); | ||
| m_lower->BlockRange().Remove(node); | ||
| m_lower->BlockRange().InsertAfter(prev, node); | ||
| } | ||
|
|
||
| GenTree* ReplaceWithTemporary(GenTree** use, GenTree* prev) | ||
| { | ||
| GenTree* node = *use; | ||
| unsigned lclNum = RequestTemporary(node->TypeGet()); | ||
| GenTree* lclStore = m_compiler->gtNewStoreLclVarNode(lclNum, node); | ||
| GenTree* lclNode = m_compiler->gtNewLclVarNode(lclNum); | ||
|
|
||
| m_lower->BlockRange().InsertAfter(node, lclStore); | ||
| m_lower->BlockRange().InsertAfter(prev, lclNode); | ||
| *use = lclNode; | ||
|
|
||
| JITDUMP("Replaced [%06u] with a temporary:\n", Compiler::dspTreeID(node)); | ||
| DISPNODE(node); | ||
| DISPNODE(lclNode); | ||
| return lclNode; | ||
| } | ||
|
|
||
| unsigned RequestTemporary(var_types type) | ||
| { | ||
| assert(varTypeIsEnregisterable(type)); | ||
|
|
||
| unsigned lclNum; | ||
| unsigned index = static_cast<unsigned>(ActualTypeToWasmValueType(type)); | ||
| Temporary* local = Remove(&m_availableTemps[index]); | ||
| if (local != nullptr) | ||
| { | ||
| lclNum = local->LclNum; | ||
| Append(&m_unusedTemps[index], local); | ||
| assert(m_compiler->lvaGetDesc(lclNum)->TypeGet() == genActualType(type)); | ||
| } | ||
| else | ||
| { | ||
| lclNum = m_compiler->lvaGrabTemp(true DEBUGARG("Stackifier temporary")); | ||
| LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); | ||
| varDsc->lvType = genActualType(type); | ||
| assert(lclNum >= m_minimumTempLclNum); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems a bit odd to me to defer creating a |
||
| } | ||
| JITDUMP("Temporary V%02u is now in use\n", lclNum); | ||
| return lclNum; | ||
| } | ||
|
|
||
| void ReleaseTemporariesDefinedBy(GenTree* node) | ||
| { | ||
| assert(IsDataFlowRoot(node)); // We rely on the node not moving after this call. | ||
| if (!node->OperIs(GT_STORE_LCL_VAR)) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand this check. Can't we see reordering under other top level node types? |
||
| { | ||
| return; | ||
| } | ||
|
|
||
| unsigned lclNum = node->AsLclVar()->GetLclNum(); | ||
| if (lclNum < m_minimumTempLclNum) | ||
| { | ||
| return; | ||
| } | ||
|
|
||
| unsigned index = static_cast<unsigned>(ActualTypeToWasmValueType(node->TypeGet())); | ||
| Temporary* local = Remove(&m_unusedTemps[index]); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we didn't defer above we could assert here instead of allocating a new temporary. |
||
| if (local == nullptr) | ||
| { | ||
| local = new (m_compiler, CMK_Lower) Temporary(); | ||
| } | ||
| local->LclNum = lclNum; | ||
|
|
||
| JITDUMP("Temporary V%02u is now free and can be re-used\n", lclNum); | ||
| Append(&m_availableTemps[index], local); | ||
| } | ||
|
|
||
| Temporary* Remove(Temporary** pTemps) | ||
| { | ||
| Temporary* local = *pTemps; | ||
| if (local != nullptr) | ||
| { | ||
| *pTemps = local->Prev; | ||
| } | ||
| return local; | ||
| } | ||
|
|
||
| void Append(Temporary** pTemps, Temporary* local) | ||
| { | ||
| local->Prev = *pTemps; | ||
| *pTemps = local; | ||
| } | ||
| }; | ||
|
|
||
| Stackifier stackifier(this); | ||
| stackifier.StackifyCurrentBlock(); | ||
| for (BasicBlock* block : m_compiler->Blocks()) | ||
| { | ||
| stackifier.StackifyBlock(block); | ||
| } | ||
| } | ||
|
|
||
| //------------------------------------------------------------------------ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Naming:
m_inUseTemps?