//=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// /// /// \file /// This file implements a pass that transforms irreducible control flow into /// reducible control flow. Irreducible control flow means multiple-entry /// loops; they appear as CFG cycles that are not recorded in MachineLoopInfo /// due to being unnatural. /// /// Note that LLVM has a generic pass that lowers irreducible control flow, but /// it linearizes control flow, turning diamonds into two triangles, which is /// both unnecessary and undesirable for WebAssembly. /// /// The big picture: Ignoring natural loops (seeing them monolithically), we /// find all the blocks which can return to themselves ("loopers"). Loopers /// reachable from the non-loopers are loop entries: if there are 2 or more, /// then we have irreducible control flow. We fix that as follows: a new block /// is created that can dispatch to each of the loop entries, based on the /// value of a label "helper" variable, and we replace direct branches to the /// entries with assignments to the label variable and a branch to the dispatch /// block. Then the dispatch block is the single entry in a new natural loop. /// /// This is similar to what the Relooper [1] does, both identify looping code /// that requires multiple entries, and resolve it in a similar way. In /// Relooper terminology, we implement a Multiple shape in a Loop shape. Note /// also that like the Relooper, we implement a "minimal" intervention: we only /// use the "label" helper for the blocks we absolutely must and no others. We /// also prioritize code size and do not perform node splitting (i.e. we don't /// duplicate code in order to resolve irreducibility). /// /// The difference between this code and the Relooper is that the Relooper also /// generates ifs and loops and works in a recursive manner, knowing at each /// point what the entries are, and recursively breaks down the problem. Here /// we just want to resolve irreducible control flow, and we also want to use /// as much LLVM infrastructure as possible. So we use the MachineLoopInfo to /// identify natural loops, etc., and we start with the whole CFG and must /// identify both the looping code and its entries. /// /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In /// Proceedings of the ACM international conference companion on Object oriented /// programming systems languages and applications companion (SPLASH '11). ACM, /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224 /// http://doi.acm.org/10.1145/2048147.2048224 /// //===----------------------------------------------------------------------===// #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "wasm-fix-irreducible-control-flow" namespace { class LoopFixer { public: LoopFixer(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop) : MF(MF), MLI(MLI), Loop(Loop) {} // Run the fixer on the given inputs. Returns whether changes were made. bool run(); private: MachineFunction &MF; MachineLoopInfo &MLI; MachineLoop *Loop; MachineBasicBlock *Header; SmallPtrSet LoopBlocks; using BlockSet = SmallPtrSet; DenseMap Reachable; // The worklist contains pairs of recent additions, (a, b), where we just // added a link a => b. using BlockPair = std::pair; SmallVector WorkList; // Get a canonical block to represent a block or a loop: the block, or if in // an inner loop, the loop header, of it in an outer loop scope, we can // ignore it. We need to call this on all blocks we work on. MachineBasicBlock *canonicalize(MachineBasicBlock *MBB) { MachineLoop *InnerLoop = MLI.getLoopFor(MBB); if (InnerLoop == Loop) { return MBB; } else { // This is either in an outer or an inner loop, and not in ours. if (!LoopBlocks.count(MBB)) { // It's in outer code, ignore it. return nullptr; } assert(InnerLoop); // It's in an inner loop, canonicalize it to the header of that loop. return InnerLoop->getHeader(); } } // For a successor we can additionally ignore it if it's a branch back to a // natural loop top, as when we are in the scope of a loop, we just care // about internal irreducibility, and can ignore the loop we are in. We need // to call this on all blocks in a context where they are a successor. MachineBasicBlock *canonicalizeSuccessor(MachineBasicBlock *MBB) { if (Loop && MBB == Loop->getHeader()) { // Ignore branches going to the loop's natural header. return nullptr; } return canonicalize(MBB); } // Potentially insert a new reachable edge, and if so, note it as further // work. void maybeInsert(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { assert(MBB == canonicalize(MBB)); assert(Succ); // Succ may not be interesting as a sucessor. Succ = canonicalizeSuccessor(Succ); if (!Succ) return; if (Reachable[MBB].insert(Succ).second) { // For there to be further work, it means that we have // X => MBB => Succ // for some other X, and in that case X => Succ would be a new edge for // us to discover later. However, if we don't care about MBB as a // successor, then we don't care about that anyhow. if (canonicalizeSuccessor(MBB)) { WorkList.emplace_back(MBB, Succ); } } } }; bool LoopFixer::run() { Header = Loop ? Loop->getHeader() : &*MF.begin(); // Identify all the blocks in this loop scope. if (Loop) { for (auto *MBB : Loop->getBlocks()) { LoopBlocks.insert(MBB); } } else { for (auto &MBB : MF) { LoopBlocks.insert(&MBB); } } // Compute which (canonicalized) blocks each block can reach. // Add all the initial work. for (auto *MBB : LoopBlocks) { MachineLoop *InnerLoop = MLI.getLoopFor(MBB); if (InnerLoop == Loop) { for (auto *Succ : MBB->successors()) { maybeInsert(MBB, Succ); } } else { // It can't be in an outer loop - we loop on LoopBlocks - and so it must // be an inner loop. assert(InnerLoop); // Check if we are the canonical block for this loop. if (canonicalize(MBB) != MBB) { continue; } // The successors are those of the loop. SmallVector ExitBlocks; InnerLoop->getExitBlocks(ExitBlocks); for (auto *Succ : ExitBlocks) { maybeInsert(MBB, Succ); } } } // Do work until we are all done. while (!WorkList.empty()) { MachineBasicBlock *MBB; MachineBasicBlock *Succ; std::tie(MBB, Succ) = WorkList.pop_back_val(); // The worklist item is an edge we just added, so it must have valid blocks // (and not something canonicalized to nullptr). assert(MBB); assert(Succ); // The successor in that pair must also be a valid successor. assert(MBB == canonicalizeSuccessor(MBB)); // We recently added MBB => Succ, and that means we may have enabled // Pred => MBB => Succ. Check all the predecessors. Note that our loop here // is correct for both a block and a block representing a loop, as the loop // is natural and so the predecessors are all predecessors of the loop // header, which is the block we have here. for (auto *Pred : MBB->predecessors()) { // Canonicalize, make sure it's relevant, and check it's not the same // block (an update to the block itself doesn't help compute that same // block). Pred = canonicalize(Pred); if (Pred && Pred != MBB) { maybeInsert(Pred, Succ); } } } // It's now trivial to identify the loopers. SmallPtrSet Loopers; for (auto MBB : LoopBlocks) { if (Reachable[MBB].count(MBB)) { Loopers.insert(MBB); } } // The header cannot be a looper. At the toplevel, LLVM does not allow the // entry to be in a loop, and in a natural loop we should ignore the header. assert(Loopers.count(Header) == 0); // Find the entries, loopers reachable from non-loopers. SmallPtrSet Entries; SmallVector SortedEntries; for (auto *Looper : Loopers) { for (auto *Pred : Looper->predecessors()) { Pred = canonicalize(Pred); if (Pred && !Loopers.count(Pred)) { Entries.insert(Looper); SortedEntries.push_back(Looper); break; } } } // Check if we found irreducible control flow. if (LLVM_LIKELY(Entries.size() <= 1)) return false; // Sort the entries to ensure a deterministic build. llvm::sort(SortedEntries, [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { auto ANum = A->getNumber(); auto BNum = B->getNumber(); return ANum < BNum; }); #ifndef NDEBUG for (auto Block : SortedEntries) assert(Block->getNumber() != -1); if (SortedEntries.size() > 1) { for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E; ++I) { auto ANum = (*I)->getNumber(); auto BNum = (*(std::next(I)))->getNumber(); assert(ANum != BNum); } } #endif // Create a dispatch block which will contain a jump table to the entries. MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock(); MF.insert(MF.end(), Dispatch); MLI.changeLoopFor(Dispatch, Loop); // Add the jump table. const auto &TII = *MF.getSubtarget().getInstrInfo(); MachineInstrBuilder MIB = BuildMI(*Dispatch, Dispatch->end(), DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32)); // Add the register which will be used to tell the jump table which block to // jump to. MachineRegisterInfo &MRI = MF.getRegInfo(); unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); MIB.addReg(Reg); // Compute the indices in the superheader, one for each bad block, and // add them as successors. DenseMap Indices; for (auto *MBB : SortedEntries) { auto Pair = Indices.insert(std::make_pair(MBB, 0)); if (!Pair.second) { continue; } unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1; Pair.first->second = Index; MIB.addMBB(MBB); Dispatch->addSuccessor(MBB); } // Rewrite the problematic successors for every block that wants to reach the // bad blocks. For simplicity, we just introduce a new block for every edge // we need to rewrite. (Fancier things are possible.) SmallVector AllPreds; for (auto *MBB : SortedEntries) { for (auto *Pred : MBB->predecessors()) { if (Pred != Dispatch) { AllPreds.push_back(Pred); } } } for (MachineBasicBlock *MBB : AllPreds) { DenseMap Map; for (auto *Succ : MBB->successors()) { if (!Entries.count(Succ)) { continue; } // This is a successor we need to rewrite. MachineBasicBlock *Split = MF.CreateMachineBasicBlock(); MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ) : MF.end(), Split); MLI.changeLoopFor(Split, Loop); // Set the jump table's register of the index of the block we wish to // jump to, and jump to the jump table. BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg) .addImm(Indices[Succ]); BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::BR)) .addMBB(Dispatch); Split->addSuccessor(Dispatch); Map[Succ] = Split; } // Remap the terminator operands and the successor list. for (MachineInstr &Term : MBB->terminators()) for (auto &Op : Term.explicit_uses()) if (Op.isMBB() && Indices.count(Op.getMBB())) Op.setMBB(Map[Op.getMBB()]); for (auto Rewrite : Map) MBB->replaceSuccessor(Rewrite.first, Rewrite.second); } // Create a fake default label, because br_table requires one. MIB.addMBB(MIB.getInstr() ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1) .getMBB()); return true; } class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass { StringRef getPassName() const override { return "WebAssembly Fix Irreducible Control Flow"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } bool runOnMachineFunction(MachineFunction &MF) override; bool runIteration(MachineFunction &MF, MachineLoopInfo &MLI) { // Visit the function body, which is identified as a null loop. if (LoopFixer(MF, MLI, nullptr).run()) { return true; } // Visit all the loops. SmallVector Worklist(MLI.begin(), MLI.end()); while (!Worklist.empty()) { MachineLoop *Loop = Worklist.pop_back_val(); Worklist.append(Loop->begin(), Loop->end()); if (LoopFixer(MF, MLI, Loop).run()) { return true; } } return false; } public: static char ID; // Pass identification, replacement for typeid WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {} }; } // end anonymous namespace char WebAssemblyFixIrreducibleControlFlow::ID = 0; INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE, "Removes irreducible control flow", false, false) FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() { return new WebAssemblyFixIrreducibleControlFlow(); } bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction( MachineFunction &MF) { LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n" "********** Function: " << MF.getName() << '\n'); bool Changed = false; auto &MLI = getAnalysis(); // When we modify something, bail out and recompute MLI, then start again, as // we create a new natural loop when we resolve irreducible control flow, and // other loops may become nested in it, etc. In practice this is not an issue // because irreducible control flow is rare, only very few cycles are needed // here. while (LLVM_UNLIKELY(runIteration(MF, MLI))) { // We rewrote part of the function; recompute MLI and start again. LLVM_DEBUG(dbgs() << "Recomputing loops.\n"); MF.getRegInfo().invalidateLiveness(); MF.RenumberBlocks(); getAnalysis().runOnMachineFunction(MF); MLI.runOnMachineFunction(MF); Changed = true; } return Changed; }