手写一个llvm的mem2reg pass
基于支配树的alloca2reg
一些基本概念
CFG控制流图
函数是由一系列基本块以及基本块的跳转关系组成的,基本块与基本块的跳转关系,组成了CFG
控制流图。
int f() {
int i = 0;
int count = 0;
for(; i < 10 ; i++) {
count++;
}
return 0;
}
其生成的中间代码大致为
def i32 @f() {
Entry:
%i = alloca i32, align 8
%count = alloca i32, align 8
store i32 0 ptr %i, align 8
store i32 0 ptr %count align 8
br Cond
Cond:
%1 = load i32 ptr %i
%2 = %1 < 10
br i1 %2 Body, After
Body:
%3 = load i32 ptr %count
%4 = add nsw i32 %3 i32 1
store i32 %4 ptr %count
%5 = load i32 ptr %i
%6 = add nsw i32 %5, i32 1
store i32 %6 ptr %i
br Cond:
After:
ret i32 0;
}
这个控制流图为
在上面的控制流图中,Entry
是入口节点, Cond
的前驱节点为Entry
, Cond
的后继节点为Body
和After
, 而且可以看到Body
的前驱节点为Cond
,后继节点也是Cond
, 这个是允许存在的。为了简化后面的表示,用prev(X)
来代表X的前驱节点,用succ(X)
来代表X的后继节点。
支配树相关的概念
- 基本块A**支配(dominate)**基本块B, 表示从控制流图顶层节点出发到基本块B的路径中,必定经过基本块A, 那么就称基本块A支配基本块B。数学表示为 A 支配 B A支配B A支配B或者 A d o m B A\quad dom \quad B AdomB
- 基本块A**直接支配(immediate dominate)**基本块B,表示基本块A支配基本块B,并且基本块A不等于B,那么称基本块A直接支配基本块B。数学表示为 A 直接支配 B A直接支配B A直接支配B或者 A i d o m B A \quad idom \quad B AidomB
- 支配边界(dominant boundary),基本块A的支配边界,称为DF(A), 它是一个集合,对于**DF(A)**中的任意元素X,满足A支配X的前驱节点,但是A不支配X。数学表示为 D F ( A ) = { X ∣ A 支配 X 的前驱节点,且 A 不支配 X } DF(A) = \{X|A支配X的前驱节点,且 A不支配X\} DF(A)={X∣A支配X的前驱节点,且A不支配X}
- 支配树由一个根节点(CFG入口节点)和一系列的子节点组成的一个由支配关系构建的树,每一个节点的子节点都是被父节点直接支配,比如上图,其支配树为
alloca2reg算法
像
alloca
,load
以及store
这类的指令,都是要访存的,在现代处理器中,CPU访存的速度比读取寄存器的速度要慢很多,所以alloca2reg
的目的就是尽量将在内存上的操作变成在寄存器上的操作,以此来提升程序运行速度。
实现alloca2reg, 需要有几个步骤。
- 首先我们需要知道
CFG
的支配关系,关于支配树的构建算法,有现成的论文可以参考,比如说节点删除法, Lengauer-Tarjan算法这里不赘述,我们使用现成的求解算法,llvm
中DominatorTree在llvm/IR/Dominators.h中
提供了计算支配树的算法,其提供的API
如下。
函数 | 描述 |
---|---|
recalculate(ParentTypen& F) | 支配树, 这里参数可以是Function |
operator[] | 获取某个基本块的支配信息 |
getIDom() | 获取某个基本块的直接支配者 |
getBlock() | 或缺这个支配节点的基本块 |
使用方法
#include "llvm/IR/Dominatore.h"
DominatorTree DT;
/// 计算支配树
DT.recalculate(F);
/// 获取某个基本块的直接支配者
BasicBlock* idomBB = DT[BB]->getIDom()->getBlock();
- 在获取基本块的支配关系之后(即拿到支配树之后)然后就是需要知道每一个基本块的支配边界,因为PHI节点的插入需要有支配边界的信息。支配边界的计算算法如下。
/// 算法伪代码
/// DF(X)代表基本快X的支配边界
/// for B in F
/// DF(B) = {}
/// for B in F {
/// if B 有多个前驱节点 {
/// for(runner in prev(B)) {
/// runner = p
/// while(runner != idom(B)) {
/// DF(runner) = DF(runner) U {B}
/// runner = idom(runner)
/// }
/// }
/// }
/// }
void calcuDf(Function& F) {
for(BasicBlock& BB : F) {
DomFsBlock[&BB] = {};
}
BasicBlock *prevBB, *idomBB;
for(BasicBlock& BB : F) {
if(pred_size(&BB) > 1) {
idomBB = DT[&BB]->getIDom()->getBlock();
for(auto prev : predecessors(&BB)) {
prevBB = dyn_cast<BasicBlock>(prev);
while(prevBB != idomBB) {
DomFsBlock[prevBB].push_back(&BB);
prevBB = DT[prevBB]->getIDom()->getBlock();
}
}
}
}
}
-
在得到支配边界之后,还需要一些其他的工作,比如收集
promoted
的alloca
指令,以及他们定义和使用的地方 -
在有了这些信息之后,下面就是插入PHI节点了。
/// 算法伪代码
/// for a in allocas
/// W = {}
/// F = {}
/// for bb in Def[a]
/// W = W U {bb}
/// while W not empty
/// X = W.back()
/// W.removeback()
/// for Y in DF(X)
/// if(Y not in F)
/// add phi(a) to Y
/// F = F u {Y}
/// 具体C++代码实现
for(AllocaInst* alloca : Allocas) {
PhiSet.clear();
W.clear();
phi = nullptr;
for (BasicBlock* BB : DefsBlock[alloca]) {
W.push_back(BB);
}
while(!W.empty()) {
X = W.back();
W.pop_back();
for(BasicBlock* Y : DomFsBlock[X]) {
if(PhiSet.find(Y) == PhiSet.end()) {
phi = PHINode::Create(alloca->getAllocatedType(), 0);
phi->insertBefore(dyn_cast<Instruction>(Y->begin()));
PhiSet.insert(Y);
PhiMap[Y].insert({phi, alloca});
if(std::find(W.begin(), W.end(), Y) == W.end())
W.push_back(Y);
}
}
}
}
- 在插入
phi
节点之后,下面就是填充phi
节点了, 即rename
, 其伪代码为
WorkList = {}
IncommingValue = Null;
for(alloca in Allocas)
IncommingValue[alloca] = Undef
WorkList.add({EntryBB, IncommingValue})
while WorkList not empty
BB = WorkList.back()[0]
IncommingValue = WorkList.back()[1]
WorkList remove back
for inst in BB
if inst is load
if load's var in Allocas
remove load
replace load's user with IncommingValue[load's var]
else if inst is store
if store's var in Allocas
remove store
IncommingValue[store's var] = store's value
else if inst is phi
IncommingValue[phi's var] = phi
for succBB in succ(BB)
WorkList.add({succBB, IncommingValue})
for phi in succBB
phi.addIncommingValue(IncommingValue[phi's var], BB)
项目源代码
#include "llvm/Pass.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Dominators.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/IR/CFG.h"
#include <set>
#include <cassert>
#include <map>
#include <vector>
using namespace llvm;
namespace {
struct Alloca2RegPass : public FunctionPass {
static char ID;
Alloca2RegPass() : FunctionPass(ID) {}
// std::vector
std::vector<AllocaInst*> Allocas;
std::map<AllocaInst*, std::set<BasicBlock*>> DefsBlock;
std::map<AllocaInst*, std::set<BasicBlock*>> UsesBlock;
std::map<BasicBlock*, std::map<PHINode*, AllocaInst*>> PhiMap;
std::map<AllocaInst*, Value*> ValueMap;
/// @brief 存储基本块的支配边界
std::map<BasicBlock*, std::vector<BasicBlock*>> DomFsBlock;
StoreInst *OnlyStore;
BasicBlock *OnlyBlock;
// 使用支配树来实现的alloca2reg版本
DominatorTree DT;
/// @brief 计算基本块的支配边界
/// @param BB
void calcuDf(Function& F) {
for(BasicBlock& BB : F) {
DomFsBlock[&BB] = {};
}
BasicBlock* pB, *prevBB, *idomBB;
for(BasicBlock& BB : F) {
if(pred_size(&BB) > 1) {
idomBB = DT[&BB]->getIDom()->getBlock();
for(auto prev : predecessors(&BB)) {
prevBB = dyn_cast<BasicBlock>(prev);
while(prevBB != idomBB) {
DomFsBlock[prevBB].push_back(&BB);
prevBB = DT[prevBB]->getIDom()->getBlock();
}
}
}
}
}
int getInstructionIndex(BasicBlock* BB, Instruction* I) {
int index = 0;
for(BasicBlock::iterator it = BB->begin(), end = --BB->end() ; it != end ; ++it) {
if(I == dyn_cast<Instruction>(it))
return index;
index++;
}
return -1;
}
bool rewriteSingleStoreAlloca(AllocaInst* alloca) {
bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
BasicBlock *StoreBB = OnlyStore->getParent();
int StoreIndex = -1;
UsesBlock[alloca].clear();
for (User *U : alloca->users()) {
Instruction *UserInst = cast<Instruction>(U);
if (UserInst == OnlyStore)
continue;
LoadInst *LI = cast<LoadInst>(UserInst);
if (!StoringGlobalVal) { // Non-instructions are always dominated.
if (LI->getParent() == StoreBB) {
if (StoreIndex == -1)
StoreIndex = getInstructionIndex(StoreBB, OnlyStore);
if (unsigned(StoreIndex) > getInstructionIndex(StoreBB, LI)) {
UsesBlock[alloca].insert(StoreBB);
continue;
}
} else if (!DT.dominates(StoreBB, LI->getParent())) {
UsesBlock[alloca].insert(LI->getParent());
continue;
}
}
Value *ReplVal = OnlyStore->getOperand(0);
LI->replaceAllUsesWith(ReplVal);
LI->eraseFromParent();
}
// Finally, after the scan, check to see if the store is all that is left.
if (UsesBlock[alloca].empty())
return false; // If not, we'll have to fall back for the remainder.
OnlyStore->eraseFromParent();
alloca->eraseFromParent();
return true;
}
void removeFromAllocaList(unsigned& AllocaIndex) {
Allocas[AllocaIndex] = Allocas.back();
Allocas.pop_back();
--AllocaIndex;
}
bool isAggregateType(llvm::Type* ty) {
return ty->isIntegerTy();
}
/// 这个函数用于判断alloca指令,是否只有load/store. 如果没有使用,或者只有load和store返回true, 否则返回false
bool isPromote(const AllocaInst* allocaInst) {
if(!isAggregateType(allocaInst->getAllocatedType()))
return false;
for(const User *user : allocaInst->users()) {
if(const LoadInst* load = dyn_cast<LoadInst>(user)) {
continue;
}
else if(const StoreInst* store = dyn_cast<StoreInst>(user)) {
continue;
}
else{
return false;
}
}
return true;
}
/// 收集promoted的alloca指令
void collectPromotedAllocas(Function& F) {
BasicBlock* BB;
Allocas.clear();
for(Function::iterator bbIt = F.begin(), bbEnd = F.end() ; bbIt != bbEnd ; ++bbIt) {
BB = &F.getEntryBlock();
for(BasicBlock::iterator instIt = BB->begin(), instEnd = --BB->end() ; instIt != instEnd ; ++instIt) {
if(AllocaInst* alloca = dyn_cast<AllocaInst>(instIt)) {
if(isPromote(alloca))
Allocas.push_back(alloca);
}
}
}
}
void clear() {
OnlyStore = nullptr;
}
/// 分析alloca指令,找出他定义的块和使用的块
void analysisAlloca(AllocaInst* alloca) {
for(User* u : alloca->users()) {
Instruction* i = dyn_cast<Instruction>(u);
if (StoreInst *SI = dyn_cast<StoreInst>(i)) {
DefsBlock[alloca].insert(SI->getParent());
OnlyStore = SI;
} else {
LoadInst *LI = cast<LoadInst>(i);
UsesBlock[alloca].insert(LI->getParent());
}
}
}
/// @brief 执行alloca2reg
/// @param F
void executeMem2Reg(Function& F) {
/// step1 删除掉没有使用的alloca指令
/// @example int getValue() {
/// int a; // this value will be remove
/// return 10;
/// }
///
for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
AllocaInst *AI = Allocas[AllocaNum];
// 如果当前alloca没有user,删除掉这条alloca指令
if (AI->use_empty()) {
AI->eraseFromParent();
removeFromAllocaList(AllocaNum);
}
/// 分析alloca指令,寻找定义和使用他的地方
analysisAlloca(AI);
// 如果alloca只被定义了一次
// if(DefsBlock[AI].size() == 1) {
// if (rewriteSingleStoreAlloca(AI)) {
// removeFromAllocaList(AllocaNum);
// continue;
// }
// }
}
std::set<BasicBlock*> PhiSet;
std::vector<BasicBlock*> W;
PHINode* phi;
BasicBlock* X;
/// 插入phi节点
for(AllocaInst* alloca : Allocas) {
PhiSet.clear();
W.clear();
phi = nullptr;
for (BasicBlock* BB : DefsBlock[alloca]) {
W.push_back(BB);
}
while(!W.empty()) {
X = W.back();
W.pop_back();
for(BasicBlock* Y : DomFsBlock[X]) {
if(PhiSet.find(Y) == PhiSet.end()) {
phi = PHINode::Create(alloca->getAllocatedType(), 0);
phi->insertBefore(dyn_cast<Instruction>(Y->begin()));
PhiSet.insert(Y);
PhiMap[Y].insert({phi, alloca});
if(std::find(W.begin(), W.end(), Y) == W.end())
W.push_back(Y);
}
}
}
}
std::vector<Instruction*> InstCanBeRemoveList;
std::vector<std::pair<BasicBlock*, std::map<AllocaInst*, Value*>>> WorkList;
std::set<BasicBlock*> VisitedSet;
BasicBlock *SuccBB, *BB;
std::map<AllocaInst*, Value*> IncommingValues;
Instruction* Inst;
WorkList.push_back({&F.getEntryBlock(), {}});
for(AllocaInst* alloca : Allocas) {
WorkList[0].second[alloca] = UndefValue::get(alloca->getAllocatedType());
}
///> 遍历所有基本块,用phi节点替换掉那些没有使用到的基本块以及填充phi的imcoming value
while(!WorkList.empty()) {
BB = WorkList.back().first;
IncommingValues = WorkList.back().second;
WorkList.pop_back();
if(VisitedSet.find(BB) != VisitedSet.end())
continue;
else
VisitedSet.insert(BB);
for(BasicBlock::iterator it = BB->begin(), end = --BB->end() ; it != end ; ++it) {
Inst = dyn_cast<Instruction>(it);
if(AllocaInst* AI = dyn_cast<AllocaInst>(it)) {
if(std::find(Allocas.begin(), Allocas.end(), AI) == Allocas.end())
continue;
InstCanBeRemoveList.push_back(Inst);
}
else if(LoadInst* LI = dyn_cast<LoadInst>(it)) {
AllocaInst* AI = dyn_cast<AllocaInst>(LI->getPointerOperand());
if(!AI)
continue;
if(std::find(Allocas.begin(), Allocas.end(), AI) != Allocas.end()) {
if(IncommingValues.find(AI) == IncommingValues.end())
IncommingValues[AI] = UndefValue::get(AI->getAllocatedType());
LI->replaceAllUsesWith(IncommingValues[AI]);
InstCanBeRemoveList.push_back(Inst);
}
}
else if(StoreInst* SI = dyn_cast<StoreInst>(it)) {
AllocaInst* AI = dyn_cast<AllocaInst>(SI->getPointerOperand());
if(!AI)
continue;
if(std::find(Allocas.begin(), Allocas.end(), AI) == Allocas.end())
continue;
IncommingValues[AI] = SI->getValueOperand();
InstCanBeRemoveList.push_back(Inst);
}
else if(PHINode* Phi = dyn_cast<PHINode>(it)) {
if(PhiMap[BB].find(Phi) == PhiMap[BB].end())
continue;
IncommingValues[PhiMap[BB][Phi]] = Phi;
}
}
for(succ_iterator PI = succ_begin(BB), E = succ_end(BB); PI != E; ++PI) {
SuccBB = dyn_cast<BasicBlock>(*PI);
WorkList.push_back({SuccBB, IncommingValues});
for(BasicBlock::iterator it = SuccBB->begin(), end = --SuccBB->end() ; it != end ; ++it) {
if(PHINode* Phi = dyn_cast<PHINode>(it)) {
if(PhiMap[SuccBB].find(Phi) == PhiMap[SuccBB].end()) {
continue;
}
if(IncommingValues[PhiMap[SuccBB][Phi]]) {
Phi->addIncoming(IncommingValues[PhiMap[SuccBB][Phi]], BB);
}
}
}
}
}
/// 因为迭代器的原因,指令先存在一个vector中,然后处理完之后一次性删除
while(!InstCanBeRemoveList.empty()) {
Inst = InstCanBeRemoveList.back();
Inst->eraseFromParent();
InstCanBeRemoveList.pop_back();
}
/// 删除掉那些没有使用的phi指令
for(auto & it1 : PhiMap) {
for(auto &it : it1.second) {
if(it.first->use_empty())
it.first->eraseFromParent();
}
}
}
virtual bool runOnFunction(Function& F) {
while(true) {
errs() << "Working on function called " << F.getName() << "!\n";
collectPromotedAllocas(F);
if(Allocas.empty())
break;
DefsBlock.clear();
UsesBlock.clear();
PhiMap.clear();
DomFsBlock.clear();
/// 计算支配树
DT.recalculate(F);
/// 计算支配边界
calcuDf(F);
executeMem2Reg(F);
}
return true;
}
};
}
char Alloca2RegPass::ID = 0;
static RegisterPass<Alloca2RegPass> X("alloca2reg", "Alloca-to-register pass for minic", false, false);
// Automatically enable the pass.
// http://adriansampson.net/blog/clangpass.html
static void registerAlloca2RegPass(const PassManagerBuilder &,
legacy::PassManagerBase &PM) {
PM.add(new Alloca2RegPass());
}
static RegisterStandardPasses
RegisterMyPass(PassManagerBuilder::EP_EarlyAsPossible,
registerAlloca2RegPass);