代码混淆——控制流扁平的开源实践和改进

孤挺花(Armariris): 由上海交通大学密码与计算机安全实验室维护的LLVM混淆框架,支持多平台、多语言。目前支持的功能主要有控制流混淆、指令替换、字符串加密。https://github.com/GoSSIP-SJTU/Armariris#armariris

本文主要对Armariris的控制流混淆实现代码进行分析,了解Armariris的实现方式。并对混淆强度进行部分改进。

下面先看下混淆前后的对比

混淆前

混淆后

可以看到CFG的基本块变多了,控制流也变得复杂了。

Armariris脱胎于OLLVM,都是采用在LLVM中增加一个pass的方式来实现对源码的混淆处理。由于本身编译的混淆pass时静态库的形式存在的,不适合我们查看输出后的IR文件,也不适合调试,所以第一步我先对LLVM的编译方式进行修改。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#add_llvm_library(LLVMObfuscation
# CryptoUtils.cpp
# StringObfuscation.cpp
# Substitution.cpp
# Flattening.cpp
# Utils.cpp
# )

#add_dependencies(LLVMObfuscation intrinsics_gen)

add_llvm_loadable_module( LLVMObfuscation
CryptoUtils.cpp
StringObfuscation.cpp
Substitution.cpp
Flattening.cpp
Utils.cpp

DEPENDS
intrinsics_gen
PLUGIN_TOOL
opt
)

这是Armariris/lib/Transforms/Obfuscation路径下的cmake文件修改情况,如此一来我们就可以通过opt来load生成的LLVMObfuscation,单独进行混淆,而不必完成整个编译过程。

当然除了这个文件还需要修改一些LLVMBuild.txt和PassManager的cpp,解除原来静态库的依赖关系。这里就不详说了。

修改完成后进行编译就可以在编译好的lib文件夹下发现LLVMObfuscation.dylib,然后我们准备一个样例文件,long.cpp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
int main() 
{
int Tom;
int Jerry;
int Dog;

Jerry = 7;
Dog = 5;
Tom = Jerry ^ 13;

if ( Jerry > Tom ) {
int Cat;
int Mouse;
int Dog_;

Cat = Jerry;
Mouse = 0;
Cat |= 5;
Mouse |= 6;
Cat += Mouse;
Dog_ = Tom > Cat ? Cat : Mouse;
Cat = Dog_ + Mouse;
}

int &eee = Dog;

Tom = 3;
Tom ^= (Dog + Jerry);
Dog = Tom + Jerry;
Dog += 3;
Tom = Jerry | Dog;

return 0;
}

在里面填入简单的代码。

然后依次执行命令

1
2
3
4
clang -S -emit-llvm long.cpp -o long.ll

./bin/opt -load ./lib/LLVMObfuscation.dylib -flattening long.ll -o long.bc --

第一步先用clang输出long.cpp的中间语言文件——long.ll

第二步通过opt加载LLVMObfuscation.dylib执行混淆,输出的是LLVM的bitcode文件,是二进制格式,通过相应的工具也可以转换成IR文件

1
llvm-dis long.bc -o long-ir.ll

然后我们查看混淆前的long.ll和混淆后的long-ir.ll有什么区别

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
; Function Attrs: noinline norecurse nounwind ssp uwtable
define i32 @main() #0 {
%1 = alloca i32, align 4
%2 = alloca i32, align 4
%3 = alloca i32, align 4
%4 = alloca i32, align 4
%5 = alloca i32, align 4
%6 = alloca i32, align 4
%7 = alloca i32, align 4
%8 = alloca i32*, align 8
store i32 0, i32* %1, align 4
store i32 7, i32* %3, align 4
store i32 5, i32* %4, align 4
%9 = load i32, i32* %3, align 4
%10 = xor i32 %9, 13
store i32 %10, i32* %2, align 4
%11 = load i32, i32* %3, align 4
%12 = load i32, i32* %2, align 4
%13 = icmp sgt i32 %11, %12
br i1 %13, label %14, label %35

; <label>:14: ; preds = %0
%15 = load i32, i32* %3, align 4
store i32 %15, i32* %5, align 4
store i32 0, i32* %6, align 4
%16 = load i32, i32* %5, align 4
%17 = or i32 %16, 5
store i32 %17, i32* %5, align 4
%18 = load i32, i32* %6, align 4
%19 = or i32 %18, 6
store i32 %19, i32* %6, align 4
%20 = load i32, i32* %6, align 4
%21 = load i32, i32* %5, align 4
%22 = add nsw i32 %21, %20
store i32 %22, i32* %5, align 4
%23 = load i32, i32* %2, align 4
%24 = load i32, i32* %5, align 4
%25 = icmp sgt i32 %23, %24
br i1 %25, label %26, label %28

; <label>:26: ; preds = %14
%27 = load i32, i32* %5, align 4
br label %30

; <label>:28: ; preds = %14
%29 = load i32, i32* %6, align 4
br label %30

; <label>:30: ; preds = %28, %26
%31 = phi i32 [ %27, %26 ], [ %29, %28 ]
store i32 %31, i32* %7, align 4
%32 = load i32, i32* %7, align 4
%33 = load i32, i32* %6, align 4
%34 = add nsw i32 %32, %33
store i32 %34, i32* %5, align 4
br label %35

; <label>:35: ; preds = %30, %0
store i32* %4, i32** %8, align 8
store i32 3, i32* %2, align 4
%36 = load i32, i32* %4, align 4
%37 = load i32, i32* %3, align 4
%38 = add nsw i32 %36, %37
%39 = load i32, i32* %2, align 4
%40 = xor i32 %39, %38
store i32 %40, i32* %2, align 4
%41 = load i32, i32* %2, align 4
%42 = load i32, i32* %3, align 4
%43 = add nsw i32 %41, %42
store i32 %43, i32* %4, align 4
%44 = load i32, i32* %4, align 4
%45 = add nsw i32 %44, 3
store i32 %45, i32* %4, align 4
%46 = load i32, i32* %3, align 4
%47 = load i32, i32* %4, align 4
%48 = or i32 %46, %47
store i32 %48, i32* %2, align 4
ret i32 0
}

long.ll

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
; Function Attrs: noinline norecurse nounwind ssp uwtable
define i32 @main() #0 {
%.reg2mem2 = alloca i32
%.reg2mem = alloca i32
%1 = alloca i32, align 4
%2 = alloca i32, align 4
%3 = alloca i32, align 4
%4 = alloca i32, align 4
%5 = alloca i32, align 4
%6 = alloca i32, align 4
%7 = alloca i32, align 4
%8 = alloca i32*, align 8
store i32 0, i32* %1, align 4
store i32 7, i32* %3, align 4
store i32 5, i32* %4, align 4
%9 = load i32, i32* %3, align 4
%10 = xor i32 %9, 13
store i32 %10, i32* %2, align 4
%11 = load i32, i32* %3, align 4
store i32 %11, i32* %.reg2mem
%12 = load i32, i32* %2, align 4
store i32 %12, i32* %.reg2mem2
%switchVar = alloca i32
store i32 -1905808842, i32* %switchVar
%.reg2mem4 = alloca i32
br label %loopEntry

loopEntry: ; preds = %0, %loopEnd
%switchVar1 = load i32, i32* %switchVar
switch i32 %switchVar1, label %switchDefault [
i32 -1905808842, label %first
i32 985277384, label %15
i32 -162188989, label %28
i32 -1992348767, label %30
i32 -705746547, label %32
i32 -1653362465, label %36
]

switchDefault: ; preds = %loopEntry
br label %loopEnd

first: ; preds = %loopEntry
%.reload = load volatile i32, i32* %.reg2mem
%.reload3 = load volatile i32, i32* %.reg2mem2
%13 = icmp sgt i32 %.reload, %.reload3
%14 = select i1 %13, i32 985277384, i32 -1653362465
store i32 %14, i32* %switchVar
br label %loopEnd

; <label>:15: ; preds = %loopEntry
%16 = load i32, i32* %3, align 4
store i32 %16, i32* %5, align 4
store i32 0, i32* %6, align 4
%17 = load i32, i32* %5, align 4
%18 = or i32 %17, 5
store i32 %18, i32* %5, align 4
%19 = load i32, i32* %6, align 4
%20 = or i32 %19, 6
store i32 %20, i32* %6, align 4
%21 = load i32, i32* %6, align 4
%22 = load i32, i32* %5, align 4
%23 = add nsw i32 %22, %21
store i32 %23, i32* %5, align 4
%24 = load i32, i32* %2, align 4
%25 = load i32, i32* %5, align 4
%26 = icmp sgt i32 %24, %25
%27 = select i1 %26, i32 -162188989, i32 -1992348767
store i32 %27, i32* %switchVar
br label %loopEnd

; <label>:28: ; preds = %loopEntry
%29 = load i32, i32* %5, align 4
store i32 -705746547, i32* %switchVar
store i32 %29, i32* %.reg2mem4
br label %loopEnd

; <label>:30: ; preds = %loopEntry
%31 = load i32, i32* %6, align 4
store i32 -705746547, i32* %switchVar
store i32 %31, i32* %.reg2mem4
br label %loopEnd

; <label>:32: ; preds = %loopEntry
%.reload5 = load i32, i32* %.reg2mem4
store i32 %.reload5, i32* %7, align 4
%33 = load i32, i32* %7, align 4
%34 = load i32, i32* %6, align 4
%35 = add nsw i32 %33, %34
store i32 %35, i32* %5, align 4
store i32 -1653362465, i32* %switchVar
br label %loopEnd

; <label>:36: ; preds = %loopEntry
store i32* %4, i32** %8, align 8
store i32 3, i32* %2, align 4
%37 = load i32, i32* %4, align 4
%38 = load i32, i32* %3, align 4
%39 = add nsw i32 %37, %38
%40 = load i32, i32* %2, align 4
%41 = xor i32 %40, %39
store i32 %41, i32* %2, align 4
%42 = load i32, i32* %2, align 4
%43 = load i32, i32* %3, align 4
%44 = add nsw i32 %42, %43
store i32 %44, i32* %4, align 4
%45 = load i32, i32* %4, align 4
%46 = add nsw i32 %45, 3
store i32 %46, i32* %4, align 4
%47 = load i32, i32* %3, align 4
%48 = load i32, i32* %4, align 4
%49 = or i32 %47, %48
store i32 %49, i32* %2, align 4
ret i32 0

loopEnd: ; preds = %32, %30, %28, %15, %first, %switchDefault
br label %loopEntry
}

long-ir.ll

下面我们来分析Armariris的代码,看看它是如何实现的。控制流扁平化的伪算法在前一篇中已经介绍过了。当然由于实现方式方法有小小的不同,过程会略有差异,但是目标和大致过程都是相同的。

首先是入口函数

1
2
3
4
5
6
7
8
9
10
11
12
13
bool Flattening::runOnFunction(Function &F) {
Function *tmp = &F;

// Do we obfuscate
if (toObfuscate(flag, tmp, "fla") && ((int) llvm::cryptoutils->get_range(100) <= Percentage)) {
//errs() << "fla " + F.getName() +"\n";
if (flatten(tmp)) {
++Flattened;
}
}

return false;
}

Flattening是执行的混淆的主要类,我们在混淆命令中加入的 -flattening 就是为了指定调用Flattening。

为什么说runOnFunction是入口函数呢,这是由Pass的调用过程决定的。首先,opt会生成PassManager。PassManager有一个成员PM

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/// PassManager manages ModulePassManagers
class PassManager : public PassManagerBase {
public:

PassManager();
~PassManager() override;

void add(Pass *P) override;

/// run - Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the module, and if so, return true.
bool run(Module &M);

private:
/// PassManagerImpl_New is the actual class. PassManager is just the
/// wraper to publish simple pass manager interface
PassManagerImpl *PM;
};

PM是PassManagerImpl类型,主要的执行类,启动通过内部的run函数启动PassManagerImpl。

1
2
3
4
for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) {
Changed |= getContainedManager(Index)->runOnModule(M);
M.getContext().yield();
}

PassManagerImpl会选择相对应的内容管理者。getContainedManager返回FunctionPass的管理者MPPassManager

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
MPPassManager::runOnModule(Module &M) {
bool Changed = false;

...

// Initialize module passes
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
Changed |= getContainedPass(Index)->doInitialization(M);

for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
ModulePass *MP = getContainedPass(Index);
...
{
PassManagerPrettyStackEntry X(MP, M);
TimeRegion PassTimer(getPassTimer(MP));

LocalChanged |= MP->runOnModule(M);
}
...

通过多态,MP选择FPPassManager::runOnModule进入下一步

1
2
3
4
5
6
7
8
bool FPPassManager::runOnModule(Module &M) {
bool Changed = false;

for (Function &F : M)
Changed |= runOnFunction(F);

return Changed;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
bool FPPassManager::runOnFunction(Function &F) {
...

// 迭代取出每一个FunctionPass
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
FunctionPass *FP = getContainedPass(Index);
...

{
PassManagerPrettyStackEntry X(FP, F);
TimeRegion PassTimer(getPassTimer(FP));

LocalChanged |= FP->runOnFunction(F);//调用每一个FunctionPass的runOnFunction
}

而Flattening也是继承于FunctionPass,所以runOnFunction也就成了Flattening的入口,从这里进入混淆函数flatten

到了这里,就正式进入了混淆的流程。我们先回顾一下上一篇代码混淆之道(二)中我们抽象出来的控制流扁平化算法:

标识符重命名(解决变量名冲突)——>控制语句展开(全变成if)——>变量声明提前——>控制流压扁

在IR文件中,由于LLVM的IR是SSA(静态单赋值形式)形式的,标识符重命名和变量声明提前这两步可以省略。而第二步中的控制语句的展开,高级语言中的For、While等循环结构已经都变成了条件判断+Branch的形式,可以理解为通常所说的if-goto的方式。只有switch的方式还保存着,所以只需要将仍然保存的switch也改写成条件判断+br跳转的形式即可。从这里也可以看出在IR层进行混淆的好处,大量的工作前端在生成IR的同时已经由编译器帮我们搞定了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
bool Flattening::flatten(Function *f) {
vector<BasicBlock *> origBB;
BasicBlock *loopEntry;
BasicBlock *loopEnd;
LoadInst *load;
SwitchInst *switchI;
AllocaInst *switchVar;

// SCRAMBLER
char scrambling_key[16];
llvm::cryptoutils->get_bytes(scrambling_key, 16);
// END OF SCRAMBLER

// Lower switch
FunctionPass *lower = createLowerSwitchPass();
lower->runOnFunction(*f);

flatten函数开始的时候执行各种声明,并且调用LowerSwitchPass。这个pass负责将IR文件中目前仍然存在的switch结构转化成if结构。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
// Save all original BB
for (Function::iterator i = f->begin(); i != f->end(); ++i) {
BasicBlock *tmp = &*i;
origBB.push_back(tmp);

BasicBlock *bb = &*i;
if (isa<InvokeInst>(bb->getTerminator())) {
return false;
}
}

// Nothing to flatten
if (origBB.size() <= 1) {
return false;
}

// Remove first BB
origBB.erase(origBB.begin());

origBB保存除第一个BasicBlock之外的所有BasicBlock。这里的BasicBlock就是IR层CFG的block,我们的FunctionPass所获得的Function都是以CFG的形式存在的。之所以第一个BasicBlock不保存是因为需要对它进行大刀阔斧的改动,包括分配用于switch判断的变量的内存和跳转如循环。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
// If main begin with an if
BranchInst *br = NULL;
if (isa<BranchInst>(insert->getTerminator())) {
br = cast<BranchInst>(insert->getTerminator());
}

if ((br != NULL && br->isConditional()) ||
insert->getTerminator()->getNumSuccessors() > 1) {
BasicBlock::iterator i = insert->back().getIterator();

if (insert->size() > 1) {
i--;
}

BasicBlock *tmpBB = insert->splitBasicBlock(i, "first");
origBB.insert(origBB.begin(), tmpBB);
}

// Remove jump
insert->getTerminator()->eraseFromParent();

将第一个块尾部的跳转内容(如果有的话)提取到一个新的BasicBlock中,并且放入到origBB中。

1
2
3
4
5
6
7
8
9
10
11
// Create switch variable and set as it
switchVar =
new AllocaInst(Type::getInt32Ty(f->getContext()), 0, "switchVar", insert);
new StoreInst(
ConstantInt::get(Type::getInt32Ty(f->getContext()),
llvm::cryptoutils->scramble32(0, scrambling_key)),
switchVar, insert);

// Create main loop
loopEntry = BasicBlock::Create(f->getContext(), "loopEntry", f, insert);
loopEnd = BasicBlock::Create(f->getContext(), "loopEnd", f, insert);

接下来为switch所需要的变量switchVar分配内存,并产生两个新BasicBlock,loopEntry和loopEnd。在loopEntry中通过switch结构来判断接下来执行哪个BasicBlock,所有的BasicBlock最后都会将switchVar的值修改为再下一个BasicBlock对应的值,并且跳转到loopEnd,再由loopEnd重新回到loopEntry。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
// Create switch instruction itself and set condition
switchI = SwitchInst::Create(&*(f->begin()), swDefault, 0, loopEntry);
switchI->setCondition(load);

// Remove branch jump from 1st BB and make a jump to the while
f->begin()->getTerminator()->eraseFromParent();

BranchInst::Create(loopEntry, &*(f->begin()));

// Put all BB in the switch
for (vector<BasicBlock *>::iterator b = origBB.begin(); b != origBB.end();
++b) {
BasicBlock *i = *b;
ConstantInt *numCase = NULL;

// Move the BB inside the switch (only visual, no code logic)
i->moveBefore(loopEnd);

// Add case to switch
numCase = cast<ConstantInt>(ConstantInt::get(
switchI->getCondition()->getType(),
llvm::cryptoutils->scramble32(switchI->getNumCases(), scrambling_key)));
switchI->addCase(numCase, i);
}

生成新的switch结构,并且将origBB中的BasicBlock一一放入其中,每个BasicBlock所需要的switchVar则随机生成。

接下来就是对每个BasicBlock的尾巴部分进行修改,这里要分两种情况

第一种是无条件跳转,也就是上一个BasicBlock会直接跳转到下一个BasicBlock,或者说是它只有一个后继节点

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
// If it's a non-conditional jump
if (i->getTerminator()->getNumSuccessors() == 1) {
// Get successor and delete terminator
BasicBlock *succ = i->getTerminator()->getSuccessor(0);
i->getTerminator()->eraseFromParent();

// Get next case
numCase = switchI->findCaseDest(succ);

// If next case == default case (switchDefault)
if (numCase == NULL) {
numCase = cast<ConstantInt>(
ConstantInt::get(switchI->getCondition()->getType(),
llvm::cryptoutils->scramble32(
switchI->getNumCases() - 1, scrambling_key)));
}

// Update switchVar and jump to the end of loop
new StoreInst(numCase, load->getPointerOperand(), i);
BranchInst::Create(loopEnd, i);
continue;
}

这种情况比较好办。我们只需要给switchVar赋后继BasicBlock的case number,然后将br loopEnd添加到结尾就好。

第二种情况是有条件跳转,也就是该BasicBlock有多个后继节点

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
// If it's a conditional jump
if (i->getTerminator()->getNumSuccessors() == 2) {
// Get next cases
ConstantInt *numCaseTrue =
switchI->findCaseDest(i->getTerminator()->getSuccessor(0));
ConstantInt *numCaseFalse =
switchI->findCaseDest(i->getTerminator()->getSuccessor(1));

// Check if next case == default case (switchDefault)
if (numCaseTrue == NULL) {
numCaseTrue = cast<ConstantInt>(
ConstantInt::get(switchI->getCondition()->getType(),
llvm::cryptoutils->scramble32(
switchI->getNumCases() - 1, scrambling_key)));
}

if (numCaseFalse == NULL) {
numCaseFalse = cast<ConstantInt>(
ConstantInt::get(switchI->getCondition()->getType(),
llvm::cryptoutils->scramble32(
switchI->getNumCases() - 1, scrambling_key)));
}

// Create a SelectInst
BranchInst *br = cast<BranchInst>(i->getTerminator());
SelectInst *sel =
SelectInst::Create(br->getCondition(), numCaseTrue, numCaseFalse, "",
i->getTerminator());

// Erase terminator
i->getTerminator()->eraseFromParent();

// Update switchVar and jump to the end of loop
new StoreInst(sel, load->getPointerOperand(), i);
BranchInst::Create(loopEnd, i);
continue;
}

这里就需要将switchVar的case number赋值改为选择性的select指令。

基本整个流程都遵循了上一篇我们所讲到的控制流扁平化算法。但是这里我们仍然可以看出一些问题。比如对于一些较大的block,混淆过后仍然是巨大的一整块

混淆前

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
; <label>:35:                                     ; preds = %30, %0
store i32* %4, i32** %8, align 8
store i32 3, i32* %2, align 4
%36 = load i32, i32* %4, align 4
%37 = load i32, i32* %3, align 4
%38 = add nsw i32 %36, %37
%39 = load i32, i32* %2, align 4
%40 = xor i32 %39, %38
store i32 %40, i32* %2, align 4
%41 = load i32, i32* %2, align 4
%42 = load i32, i32* %3, align 4
%43 = add nsw i32 %41, %42
store i32 %43, i32* %4, align 4
%44 = load i32, i32* %4, align 4
%45 = add nsw i32 %44, 3
store i32 %45, i32* %4, align 4
%46 = load i32, i32* %3, align 4
%47 = load i32, i32* %4, align 4
%48 = or i32 %46, %47
store i32 %48, i32* %2, align 4
ret i32 0

混淆后

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
; <label>:36:                                     ; preds = %loopEntry
store i32* %4, i32** %8, align 8
store i32 3, i32* %2, align 4
%37 = load i32, i32* %4, align 4
%38 = load i32, i32* %3, align 4
%39 = add nsw i32 %37, %38
%40 = load i32, i32* %2, align 4
%41 = xor i32 %40, %39
store i32 %41, i32* %2, align 4
%42 = load i32, i32* %2, align 4
%43 = load i32, i32* %3, align 4
%44 = add nsw i32 %42, %43
store i32 %44, i32* %4, align 4
%45 = load i32, i32* %4, align 4
%46 = add nsw i32 %45, 3
store i32 %46, i32* %4, align 4
%47 = load i32, i32* %3, align 4
%48 = load i32, i32* %4, align 4
%49 = or i32 %47, %48
store i32 %49, i32* %2, align 4
ret i32 0

如果被混淆的代码大多数都是这种大块大块的顺序语句的话,那混淆的意义就不是很明显。所以对于这种大块,我们需要把他们切割成小块。

可以添加如下代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
std::vector<BasicBlock *> needSplite;
unsigned instSum = 0;
for ( Function::iterator I = std::next(f->begin()); I != f->end(); I++ ) {
instSum += I->size();
}
for ( Function::iterator I = std::next(f->begin()); I != f->end(); I++ ) {
if ( I->size() > instSum / f->size() ) {
needSplite.push_back(&*I);
}
}
for ( auto I = needSplite.begin(); I != needSplite.end(); I++ ) {
BasicBlock::iterator j = (*I)->begin();
for ( unsigned step = 0; step < (*I)->size() / 2; step++ ) {
j++;
}
BasicBlock *next = (*I)->splitBasicBlock(j);
}

首先计算下所有BasicBlock包含指令的数量,对与指令数量大于平均值的,我们对它进行split。

实现对于大块的切割后我们还可以在添加虚假的控制流结构,来增加混淆的复杂性。

将上面第三个for循环的内容稍微更改一下,就可以变成一个非常简陋版本的虚假控制流。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
for ( auto I = needSplite.begin(); I != needSplite.end(); I++ ) {
//std::string tw = "split_" + std::to_string(++index);
BasicBlock::iterator j = (*I)->begin();
for ( unsigned step = 0; step < (*I)->size() / 2; step++ ) {
j++;
}
BasicBlock *next = (*I)->splitBasicBlock(j/*, tw*/);

Instruction *back = &(*I)->back();
if ( isa<BranchInst>(back) ) {
BranchInst *BI = dyn_cast<BranchInst>(back);
BI->eraseFromParent();
AllocaInst *tmpBool = new AllocaInst(Type::getInt32Ty(f->getContext()), 0, "tmp", *I);
new StoreInst(
ConstantInt::get(Type::getInt32Ty(f->getContext()), 1), tmpBool, *I);
LoadInst *loadInst = new LoadInst(tmpBool, "tmp", *I);
ICmpInst *ICmp = new ICmpInst(**I, ICmpInst::ICMP_EQ, loadInst, loadInst);
BranchInst::Create(next, (*I)->getPrevNode(), ICmp, *I);
}
}

这里我们将原本BasicBlock末尾的br指令删除,换成一个icmp+br的形式,只需要将保证每次跳转都能跳转到正确的block上,就可以了。

进化后的block变成如下情况

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
; <label>:39:                                     ; preds = %loopEntry
store i32* %4, i32** %8, align 8
store i32 3, i32* %2, align 4
%40 = load i32, i32* %4, align 4
%41 = load i32, i32* %3, align 4
%42 = add nsw i32 %40, %41
%43 = load i32, i32* %2, align 4
%44 = xor i32 %43, %42
store i32 %44, i32* %2, align 4
%45 = load i32, i32* %2, align 4
store i32 %45, i32* %.reg2mem9
%46 = load i32, i32* %3, align 4
store i32 %46, i32* %.reg2mem11
%tmp2 = alloca i32
store i32 1, i32* %tmp2
%tmp3 = load i32, i32* %tmp2
%47 = icmp eq i32 %tmp3, %tmp3
%48 = select i1 %47, i32 1569018260, i32 -2145809682
store i32 %48, i32* %switchVar
br label %loopEnd

; <label>:49: ; preds = %loopEntry
%.reload10 = load volatile i32, i32* %.reg2mem9
%.reload12 = load volatile i32, i32* %.reg2mem11
%50 = add nsw i32 %.reload10, %.reload12
store i32 %50, i32* %4, align 4
%51 = load i32, i32* %4, align 4
%52 = add nsw i32 %51, 3
store i32 %52, i32* %4, align 4
%53 = load i32, i32* %3, align 4
%54 = load i32, i32* %4, align 4
%55 = or i32 %53, %54
store i32 %55, i32* %2, align 4
ret i32 0

上述的修改全部保存到github中 https://github.com/penguin-wwy/Armariris.git

clone到本地,checkout到dev_0分支即可查看,可直接编译运行、调试。