// collect dispatch function args type std::vector<Type *> FuncTy_args; // param: targetfunc_id_value FuncTy_args.push_back(Type::getInt64Ty(Mod->getContext()));
// get dispatch function type FunctionType *FuncTy = FunctionType::get( /*Result=*/Type::getVoidTy(this->Mod->getContext()), // returning void /*Params=*/FuncTy_args, /*isVarArg=*/false);
// Store params Value *target_id_value; for (auto arg = func->arg_begin(); arg != func->arg_end(); arg++) { Value *tmparg = &*arg; if (arg == func->arg_begin()) { // targetfunc_id_value Value *paramPtr = IRBentryBB.CreateAlloca(Type::getInt64Ty(Mod->getContext())); IRBentryBB.CreateStore(tmparg, paramPtr); target_id_value = IRBentryBB.CreateLoad(paramPtr); } }
// create condition basicblock BasicBlock *conBBL = BasicBlock::Create(func->getContext(), "conBBL", func); IRBentryBB.CreateBr(conBBL); // traverse Functions and put them in the switch this->callinst_handler_curr_idx = 0;
// if return not void, alloca a memory if (!F->getReturnType()->isVoidTy()) { curr_data_offset += modDataLayout->getTypeAllocSize(F->getReturnType()); }
for (auto ins = bbl->begin(); ins != bbl->end(); ins++) {
Instruction *inst = dyn_cast<Instruction>(ins);
for (unsigned idx = 0; idx < inst->getNumOperands(); idx++) { // nglog: 假如指令的操作數中有ConstantExpr, 要把它「拉出來」變成一條獨立的指令 if (ConstantExpr *Op = dyn_cast<ConstantExpr>(inst->getOperand(idx))) { // we found a ConstantExpr // convert ConstantExpr to a equal instruction Instruction *const_inst = Op->getAsInstruction(); const_inst->insertBefore(inst); // There is a problem, PHINode must at first instruction of a basicblock, unpack all constantExpr is a potential problem // God bless there is not a constantExpr in PHINode
// replace ConstantExpr to a value in inst inst->setOperand(idx, const_inst);
#define GET_PACK_VALUE(value) (packValue(value, &value_map)) // pack type to a vector(2) // {size, TypeID} std::vector<uint8_t> type_to_hex(Type *type) { std::vector<uint8_t> res; res.push_back(modDataLayout->getTypeAllocSize(type)); res.push_back(type->getTypeID()); return res; }
// pack a value std::vector<uint8_t> packValue(Value *value, std::map<Value *, int> *value_map) { std::vector<uint8_t> res; std::vector<uint8_t> packed; std::vector<uint8_t> packType = type_to_hex(value->getType()); if (ConstantData *CD = dyn_cast<ConstantData>(value)) { packed = pack_const_value(value); } else { // if value not in map if (value_map->find(value) == value_map->end()) { // check value is not a GlobalVariable if (GlobalVariable *gv = dyn_cast<GlobalVariable>(value)) { // is a GlobalVariable and not in value_map // put it into value_map insert_to_value_map(value_map, value, curr_data_offset);
// also put it into gv_value_map gv_value_map.insert(pair<GlobalVariable *, int>(gv, curr_data_offset));
#ifdef IS_INLINE_FUNC __inline__ __attribute__((always_inline)) #endif voidalloca_handler(){ // size and type of pointer is useless uint8_t var_size = get_byte_code(); uint8_t var_type = get_byte_code();
// get pointer var offset uint64_t var_offset = unpack_code(POINTER_SIZE);
// get alloca area offset uint64_t area_offset = unpack_code(POINTER_SIZE);
// store area virtual address to var // set_var(var_offset, POINTER_SIZE, data_seg_addr+area_offset); pack_store_addr(data_seg_addr+var_offset, data_seg_addr+area_offset, var_size); }
// get indices // but only consider last indice std::vector<Value *> indices; for (auto curr_idx = inst->idx_begin(); curr_idx != inst->idx_end(); curr_idx++) { indices.push_back(*curr_idx); }
// GEP type // {0, 0}: structure value is offset // {x, x}: array, value is offset Type *srcType = inst->getSourceElementType(); std::vector<uint8_t> gep_type; std::vector<uint8_t> packed_value; if (dyn_cast<StructType>(srcType)) { // is struct type StructType *st = dyn_cast<StructType>(srcType); gep_type = {0, 0}; ConstantInt *CI = dyn_cast<ConstantInt>(indices[indices.size() - 1]); // last indice int element_idx = CI->getSExtValue(); // const value to int int curr_element_offset = 0; for (int i = 0; i < element_idx; i++) { // calc the offset between curr_element and struct_begin curr_element_offset += modDataLayout->getTypeAllocSize(st->getElementType(i)); }
// check if this callsite return a void std::vector<uint8_t> packed_res; if (inst->getType() != Type::getVoidTy(this->Mod->getContext())) { // return a value int res_offset = curr_data_offset; insert_to_value_map(&value_map, inst, curr_data_offset); int res_size = modDataLayout->getTypeAllocSize(inst->getType()); curr_data_offset += res_size;
// firstly, we need to unpack function args std::vector<Value *> target_func_args; for (unsigned idx = 0; idx < inst->getNumArgOperands(); idx++) { Value *currarg = inst->getArgOperand(idx); // nglog: 常量直接保存在target_func_args // if value is a constant, use it directly if (ConstantData *CD = dyn_cast<ConstantData>(currarg)) { target_func_args.push_back(currarg); continue; }
// if return not void, store it to gv_data_seg if (inst->getType() != Type::getVoidTy(this->Mod->getContext())) { unsigned result_value_offset = value_map[inst];
// load value from gv_data_seg ConstantInt *Zero = ConstantInt::get(Type::getInt64Ty(Mod->getContext()), 0); Value *offset_value = ConstantInt::get(Type::getInt64Ty(Mod->getContext()), result_value_offset); Value *gepinst = IRBcallFunction.CreateGEP(gv_data_seg, {Zero, offset_value}, "");
// convert gep from i8* to value->getType() * PointerType *target_ptr_type = PointerType::get(resultValue->getType(), cast<PointerType>(gepinst->getType())->getAddressSpace()); Value *ptr = IRBcallFunction.CreatePointerCast(gepinst, target_ptr_type);
// store IRBcallFunction.CreateStore(resultValue, ptr); }
// Create Return IRBcallFunction.CreateRetVoid();
最後的賦值是為了下一輪的handle_callinst()做準備。
1 2 3 4 5 6 7 8
// compare and jmp BasicBlock *falseconBBL = BasicBlock::Create(Mod->getContext(), "falseconBBL", this->callinst_handler);
// nglog: 構建類似switch結構來進行函數分發 Value *currfunc_id = ConstantInt::get(Type::getInt64Ty(Mod->getContext()), curr_func_id); Value *condition = IRBcon.CreateICmpEQ(this->targetfunc_id, currfunc_id); IRBcon.CreateCondBr(condition, callFunction, falseconBBL); this->callinst_handler_conBBL = falseconBBL;
Module *llvm_parse_bitcode_from_string() { binary_ir.resize(binary_ir_length); int binary_ir_idx = 0; for (auto s : binary_ir_vector) { for (int i = 0; i < s.size(); i++) { binary_ir[binary_ir_idx++] = s[i]; } }
for (const Argument &I : fun->args()) if (VMap.count(&I) == 0) { // Is this argument preserved? DestI->setName(I.getName()); // Copy the name over... VMap[&I] = &*DestI++; // Add mapping to VMap }
CloneFunctionInto(NewF, fun, VMap, true, returns); // nglog: 每個被vmp的函數都會對應一個vm_interpreter函數 // 因此會存在多個vm_interpreter(), 重命名成vm_interpreter_<func_name>來防止沖突 // set a new name NewF->setName(fun->getName() + "_" + F->getName());
// remove all function of interpreter_module while (true) { bool flag = true; for (auto Func = interpreter_module->begin(), Funcend = interpreter_module->end(); Func != Funcend; ++Func) {
Function *fun = dyn_cast<Function>(&*Func);
if (fun->use_empty()) { errs() << "[*] Removing function: " << fun->getName().str() << "\n"; flag = false; fun->eraseFromParent(); break; } } if (flag) break; }
// collect all callinst args std::vector<pair<Value *, int>> args_map; int arg_offset = 0; // if return not void if (!F->getReturnType()->isVoidTy()) { arg_offset += modDataLayout->getTypeAllocSize(F->getReturnType()); }
for (auto arg = F->arg_begin(); arg != F->arg_end(); arg++) { Value *tmparg = &*arg;
Value *paramPtr = irbuilder.CreateAlloca(tmparg->getType()); irbuilder.CreateStore(tmparg, paramPtr); Value *currvalue = irbuilder.CreateLoad(paramPtr);
// GEP get ptr point to offset ConstantInt *Zero = ConstantInt::get(Type::getInt64Ty(F->getContext()), 0); Value *const_curr_value_offset = ConstantInt::get(Type::getInt64Ty(F->getContext()), offset); Value *gepinst = irbuilder.CreateGEP(gv_data_seg, {Zero, const_curr_value_offset}, "");
// cast gep_ptr to value->type PointerType *target_ptr_type = PointerType::get(value->getType(), cast<PointerType>(gepinst->getType())->getAddressSpace()); Value *ptr = irbuilder.CreatePointerCast(gepinst, target_ptr_type);
// store value to data_seg+offset irbuilder.CreateStore(value, ptr); }
// store gv_data_seg and gv_code_seg to data_seg_addr, code_seg_addr Value *data_seg_ptr2int = irbuilder.CreatePtrToInt(gv_data_seg, Type::getInt64Ty(Mod->getContext())); irbuilder.CreateStore(data_seg_ptr2int, data_seg_addr); Value *code_seg_ptr2int = irbuilder.CreatePtrToInt(gv_code_seg, Type::getInt64Ty(Mod->getContext())); irbuilder.CreateStore(code_seg_ptr2int, code_seg_addr);
if (!F->getReturnType()->isVoidTy()) { // unsigned return_value_size = modDataLayout->getTypeAllocSize(F->getReturnType()); // load return value from data_seg
// GEP get ptr point to offset ConstantInt *Zero = ConstantInt::get(Type::getInt64Ty(F->getContext()), 0); // nglog: 在IR指令翻譯的開頭curr_data_offset預留的位置, 就是存放返回值 Value *gepinst = irbuilder.CreateGEP(gv_data_seg, {Zero, Zero}, ""); // cast gep_ptr to value->type PointerType *target_ptr_type = PointerType::get(F->getReturnType(), cast<PointerType>(gepinst->getType())->getAddressSpace()); Value *ptr = irbuilder.CreatePointerCast(gepinst, target_ptr_type); // load return value Value *retval = irbuilder.CreateLoad(ptr);
Assertion `(NewFunc->getParent() == nullptr || NewFunc->getParent() == OldFunc->getParent()) && "Expected NewFunc to have the same parent, or no parent"' failed.
Global is external, but doesn't have external or weak linkage! ptr @_Z7startVMv__Z3addii LLVM ERROR: Broken module found, compilation aborted!
意思是:「這個全域符號(GlobalVariable / Function / GlobalAlias)在語義上是外部可見的(external),但是卻沒有給它外部連結屬性(ExternalLinkage / WeakLinkage / ExternalWeakLinkage …),這種組合是非法的」
雖然可以將Linkage改成ExternalLinkage來解決上面這個問題,但在之後會鏈接錯誤:
1 2 3 4 5
/usr/bin/ld: /tmp/output-c589ca.o: in function `add(int, int)': test.cpp:(.text+0x21): undefined reference to `_Z7startVMv__Z3addii' /usr/bin/ld: /tmp/output-c589ca.o: in function `add2(int, int, int)': test.cpp:(.text+0x6f): undefined reference to `_Z7startVMv__Z4add2iii' clang++: error: linker command failed with exit code 1 (use -v to see invocation)
voidalloca_handler() { // size and type of pointer is useless uint8_t var_size = get_byte_code(); uint8_t var_type = get_byte_code();
// get pointer var offset uint64_t var_offset = unpack_code(POINTER_SIZE);
// get alloca area offset uint64_t area_offset = unpack_code(POINTER_SIZE);
// store area virtual address to var // set_var(var_offset, POINTER_SIZE, data_seg_addr+area_offset); pack_store_addr(data_seg_addr + var_offset, data_seg_addr + area_offset, var_size); }
假如只保存area_offset會有什麼問題?在不涉及指針操作時不會有太大問題。
而一旦遇到像my_memcpy()這樣操作指針的函數時,會出現有問題。
1 2 3 4 5 6 7 8 9 10 11 12
voidmy_memcpy(int* a, int *b){ *a = *b; }
__attribute((__annotate__(("ngvmp")))) inttest5(){ int a = 0; int b = 10; my_memcpy(&a, &b); return a; }
// if value not in map if (value_map->find(value) == value_map->end()) { // check value is not a GlobalVariable if (GlobalVariable *gv = dyn_cast<GlobalVariable>(value)) { // is a GlobalVariable and not in value_map // put it into value_map insert_to_value_map(value_map, value, curr_data_offset);
// also put it into gv_value_map gv_value_map.insert(pair<GlobalVariable *, int>(gv, curr_data_offset));
// firstly, we need to unpack function args std::vector<Value *> target_func_args; for (unsigned idx = 0; idx < inst->getNumArgOperands(); idx++) { Value *currarg = inst->getArgOperand(idx); // nglog: 常量直接保存在target_func_args // if value is a constant, use it directly if (ConstantData *CD = dyn_cast<ConstantData>(currarg)) { target_func_args.push_back(currarg); continue; } // 假如currarg是全局變量, 則它不會在value_map中, 繼而這裡取的curroffset是錯的 unsigned curroffset = value_map[currarg]; // ... }