/** ** \file llvmtranslate/translator.cc ** \brief Implementation of llvmtranslate::Translator */ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #include // LLVM_VERSION_* #include #include // llvm::verifyFunction #include #include // llvm::sys #include #pragma GCC diagnostic pop #include #include #include "llvm-type-visitor.hh" namespace llvmtranslate { using namespace std::string_literals; namespace { // Shorthands for integer type and pointer to integer type. inline llvm::IntegerType* i32_t(llvm::LLVMContext& ctx) { return llvm::Type::getInt32Ty(ctx); } inline llvm::PointerType* i32p_t(llvm::LLVMContext& ctx) { return llvm::PointerType::get(llvm::Type::getInt32Ty(ctx), 0); } llvm::AllocaInst* create_alloca(llvm::Function* ll_function, llvm::Type* ll_type, const std::string& name) { // Create an IRBuilder starting at the beginning of the current entry // block. LLVM treats allocas as local variables only if they occur at the // beginning of a function. llvm::IRBuilder<> tmp(&ll_function->getEntryBlock(), ll_function->getEntryBlock().begin()); return tmp.CreateAlloca(ll_type, nullptr, name); } // Set default attributes to the functions void set_default_attributes(llvm::Function& the_function, const ast::FunctionDec& e) { the_function.addFnAttr(llvm::Attribute::NoUnwind); // No exceptions in TC if (!e.body_get()) // Inline primitives the_function.addFnAttr(llvm::Attribute::InlineHint); } std::string function_dec_name(const ast::FunctionDec& e) { // Rename "_main" to "tc_main" if (e.name_get() == "_main") return "tc_main"; // Prefix all the primitives with "tc_" if (!e.body_get()) return "tc_" + e.name_get().get(); return e.name_get().get(); } } // namespace Translator::Translator(llvm::Module& module, escaped_map_type&& escaped) : module_{module} , ctx_{module_.getContext()} , builder_{ctx_} , escaped_{std::move(escaped)} , type_visitor_{ctx_} { // The current process triple. auto process_triple = llvm::Triple(llvm::sys::getProcessTriple()); // Set the 32-bit version of the triple. module_.setTargetTriple(process_triple.get32BitArchVariant().str()); } void Translator::operator()(const ast::Ast& e) { translate(e); value_ = nullptr; } llvm::Value* Translator::translate(const ast::Ast& node) { node.accept(*this); return value_; } llvm::Value* Translator::access_var(const ast::Var& e) { if (auto var_ast = dynamic_cast(&e)) { // FIXME DONE: Some code was deleted here. // Chopper la VarDec const ast::VarDec* dec = var_ast->def_get(); if (dec == nullptr) { // This is probably a binder error unreachable(); } // getFunction --> getValueSymbolTable --> lookup auto all_vars = locals_[current_function_]; // Ou si c'est un argument, juste iterer sur les arguments jusqu'a trouver la bonne return all_vars[dec]; } else if (auto arr_ast = dynamic_cast(&e)) { // FIXME DONE: Some code was deleted here. // Here we have the case of an access in an array (var[index]) // Faire un call recursif sur la variable parent afin d'obtenir une reference llvm::Value* mother_val = translate(arr_ast->var_get()); // Utiliser cette ref pour trouver le type associe llvm::Type* mother_ty = llvm_type(*(arr_ast->var_get().type_get())); // Creer une array ref afin de pouvoir stocker les indexs // Ici, l'index en question est index_ de la variable e llvm::Value* index_exp = translate(arr_ast->index_get()); //std::vector = const auto index = llvm::ArrayRef(index_exp); // Et il n'y a plus qu'a cree tout ca en utilisant un llvm::IRBuilderBase::CreateGEP return builder_.CreateGEP(mother_ty, mother_val, index, "subscriptptr_"s + "some_index"); } else if (auto field_ast = dynamic_cast(&e)) { const ast::Var* var = nullptr; // FIXME DONE: Some code was deleted here. // Wait, isn't this going to recurse and create many instances of the thing? // Or it is just a small piece of the path towards the full path to this var? var = &(field_ast->var_get()); auto var_val = translate(*var); const type::Record* record_type = nullptr; // FIXME DONE: Some code was deleted here. record_type = dynamic_cast(&var->type_get()->actual()); if (record_type == nullptr) { // This could be a typing error (we do not have a var pointing to a record?) unreachable(); } misc::symbol field_name; // FIXME DONE: Some code was deleted here. field_name = field_ast->name_get(); int index = -1; // FIXME DONE: Some code was deleted here (Get the index of the field). index = record_type->field_index(field_name); // The GEP instruction provides us with safe pointer arithmetics, // usually used with records or arrays. llvm::Type* record_ltype = nullptr; // FIXME DONE: Some code was deleted here (Get record's corresponding LLVM type). llvm_type(*record_type); record_ltype = type_visitor_.get_record_ltype(record_type); return builder_.CreateStructGEP(record_ltype, var_val, index, "fieldptr_"s + field_name.get()); } else unreachable(); } llvm::Value* Translator::init_array(llvm::Value* count_val, llvm::Value* init_val) { // Cast everything so that it is conform to the signature of init_array // int *init_array(int, int) // We need to separate the pointers and the ints. // LLVM requires a ptrtoint instruction for pointers // and a bitcast for others. auto init_val_cast = init_val->getType()->isPointerTy() ? builder_.CreatePtrToInt(init_val, i32_t(ctx_), "init_array_ptrtoint") : builder_.CreateBitCast(init_val, i32_t(ctx_), "init_array_bitcast"); // Create the init_array function: // First, the arguments (int*, int, int) std::vector arg_type{i32_t(ctx_), init_val_cast->getType()}; // Then, create the FunctionType. auto init_array_ltype = llvm::FunctionType::get(i32p_t(ctx_), arg_type, false); // Get the llvm::Function from the module related to the name and type auto init_array_function = module_.getOrInsertFunction("tc_init_array", init_array_ltype); // Prepare the arguments. std::vector arg_vals{count_val, init_val_cast}; // Create the call. auto init_array_call = builder_.CreateCall(init_array_function, arg_vals, "init_array_call"); // Cast the result of the call in the desired type. return builder_.CreateBitCast(init_array_call, init_val->getType()->getPointerTo(), "init_array_call_cast"); } llvm::Type* Translator::llvm_type(const type::Type& type) { type_visitor_(type); return type_visitor_.llvm_type_get(); } llvm::FunctionType* Translator::llvm_function_type(const type::Function& function_type) { // Prepare the arguments std::vector args_types; // First, if there are any escaped vars, create ptr arguments for it // (Lambda lifting) if (auto escapes_it = escaped_.find(&function_type); escapes_it != std::end(escaped_)) { auto& escapes = escapes_it->second; args_types.reserve(escapes.size() + function_type.formals_get().fields_get().size()); for (const auto dec : escapes) { llvm::Type* var_ltype = nullptr; // FIXME DONE: Some code was deleted here (Get the llvm type of the VarDec). var_ltype = llvm_type(*dec->type_get()); args_types.emplace_back(llvm::PointerType::getUnqual(var_ltype)); } } else args_types.reserve(function_type.formals_get().fields_get().size()); // Then, the actual arguments for (const auto& field : function_type.formals_get()) args_types.emplace_back(llvm_type(field.type_get())); llvm::Type* result_ltype = nullptr; // FIXME DONE: Some code was deleted here (If the result is void typed, we assign llvm void type to result_ltype). result_ltype = dynamic_cast(&function_type.result_get().actual()) != nullptr ? llvm::Type::getVoidTy(ctx_) : llvm_type(function_type.result_get()); return llvm::FunctionType::get(result_ltype, args_types, false); } void Translator::operator()(const ast::SimpleVar& e) { // Void var types are actually Ints represented by a 0 // FIXME DONE: Some code was deleted here. value_ = access_var(e); } void Translator::operator()(const ast::FieldVar& e) { // FIXME DONE: Some code was deleted here. value_ = access_var(e); } void Translator::operator()(const ast::SubscriptVar& e) { // FIXME DONE: Some code was deleted here. value_ = access_var(e); } void Translator::operator()(const ast::NilExp& e) { // FIXME DONE: Some code was deleted here (Get the record_type of the Nil type, and create a null pointer). const llvm::Type* pointer_type = llvm_type(*e.type_get()); value_ = llvm::ConstantPointerNull::get(pointer_type->getPointerTo()); } void Translator::operator()(const ast::IntExp& e) { // FIXME DONE: Some code was deleted here (Integers in Tiger are all 32bit signed). value_ = builder_.getInt32(e.value_get()); } void Translator::operator()(const ast::StringExp& e) { // FIXME DONE: Some code was deleted here (Strings are translated as `i8*` values, like C's `char*`). value_ = builder_.CreateGlobalStringPtr(e.value_get(), "str_exp"); } void Translator::operator()(const ast::RecordExp& e) { // Get the record type const type::Record* record_type = nullptr; // FIXME DONE: Some code was deleted here. record_type = dynamic_cast(&e.type_get()->actual()); // Type the record and use get_record_ltype() to get its LLVM type llvm_type(*record_type); auto struct_ltype = type_visitor_.get_record_ltype(record_type); // The size of the structure and cast it to int auto sizeof_val = llvm::ConstantExpr::getSizeOf(struct_ltype); sizeof_val = llvm::ConstantExpr::getTruncOrBitCast(sizeof_val, i32_t(ctx_)); // Generate the instruction calling Malloc auto malloc_val = builder_.CreateMalloc( i32_t(ctx_), struct_ltype, sizeof_val, nullptr, nullptr, "malloccall"); // Init the fields // FIXME DONE: Some code was deleted here. auto& fields = e.fields_get(); assertion(fields.size() == record_type->fields_get().size()); for (const auto field : fields) { const auto field_value = translate(field->init_get()); const auto field_index = record_type->field_index(field->name_get()); assertion(field_index >= 0); const auto field_llvm_address = builder_.CreateStructGEP( struct_ltype, malloc_val, field_index, "field_" + field->name_get().get() ); value_ = builder_.CreateStore(field_value, field_llvm_address); } value_ = malloc_val; } void Translator::operator()(const ast::OpExp& e) { // FIXME DONE: Some code was deleted here. // The comparison instructions returns an i1, and we need an i32, since everything // is an i32 in Tiger. Use a zero-extension to avoid this. const auto left_operand = \ get_dereferenced(translate(e.left_get()), e.left_get().type_get()); const auto right_operand = \ get_dereferenced(translate(e.right_get()), e.right_get().type_get()); switch (e.oper_get()) { case ast::OpExp::Oper::add: value_ = builder_.CreateAdd(left_operand, right_operand, "op_add"); break; case ast::OpExp::Oper::sub: value_ = builder_.CreateSub(left_operand, right_operand, "op_sub"); break; case ast::OpExp::Oper::mul: value_ = builder_.CreateMul(left_operand, right_operand, "op_mul"); break; case ast::OpExp::Oper::div: value_ = builder_.CreateSDiv(left_operand, right_operand, "op_div"); break; case ast::OpExp::Oper::eq: value_ = builder_.CreateICmpEQ(left_operand, right_operand, "op_eq"); break; case ast::OpExp::Oper::ne: value_ = builder_.CreateICmpNE(left_operand, right_operand, "op_ne"); break; case ast::OpExp::Oper::gt: value_ = builder_.CreateICmpSGT(left_operand, right_operand, "op_gt"); break; case ast::OpExp::Oper::ge: value_ = builder_.CreateICmpSGE(left_operand, right_operand, "op_ge"); break; case ast::OpExp::Oper::lt: value_ = builder_.CreateICmpSLT(left_operand, right_operand, "op_lt"); break; case ast::OpExp::Oper::le: value_ = builder_.CreateICmpSLE(left_operand, right_operand, "op_le"); break; } value_ = builder_.CreateZExtOrTrunc(value_, i32_t(ctx_), "op_zext"); } void Translator::operator()(const ast::SeqExp& e) { // An empty SeqExp is an empty expression, so we should return an int // containing 0, since its type is void. // FIXME DONE: Some code was deleted here. if (e.exps_get().empty()) { value_ = get_void_value(); return; } for (const auto& exp : e.exps_get()) { translate(*exp); if (dynamic_cast(exp) != nullptr) { value_ = get_dereferenced(value_, exp->type_get()); } } } void Translator::operator()(const ast::AssignExp& e) { // FIXME DONE: Some code was deleted here. llvm::Value* value = \ get_dereferenced(translate(e.exp_get()), e.exp_get().type_get()); llvm::Value* variable = translate(e.var_get()); value_ = builder_.CreateStore(value, variable); } void Translator::operator()(const ast::IfExp& e) { // FIXME: Some code was deleted here (IfExps are handled in a similar way to Kaleidoscope (see LangImpl5.html)). llvm::Value* cond = translate(e.test_get()); auto zero_val = llvm::ConstantInt::getSigned(cond->getType(), 0); // The condition may not be correct we are checking that this is different than 0 cond = builder_.CreateICmpNE(cond, zero_val, "ifcond"); // We create the blocks auto then_block = llvm::BasicBlock::Create(ctx_, "then_body", current_function_); auto else_block = llvm::BasicBlock::Create(ctx_, "else_body", current_function_); auto after_if = llvm::BasicBlock::Create(ctx_, "after_if", current_function_); // Explicitely create the if statement builder_.CreateCondBr(cond, then_block, else_block); // We now tell the builder to insert newly added code to this block builder_.SetInsertPoint(then_block); llvm::Value* then_body = translate(e.thenclause_get()); // Create an unconditional jump to the end of the if statement builder_.CreateBr(after_if); // Very akward case where we have an if inside another if // Apparently this is done to not disturb the PHI node then_block = builder_.GetInsertBlock(); // We add the else block to the current function // Function is private so we directly add the block to the function // current_function_->getBasicBlockList().push_back(else_block); // We are now adding stuff in the else block builder_.SetInsertPoint(else_block); llvm::Value* else_body = translate(e.elseclause_get()); builder_.CreateBr(after_if); else_block = builder_.GetInsertBlock(); // Finally we add the remaining block, the one to be executer after the if // current_function_->getBasicBlockList().push_back(after_if); builder_.SetInsertPoint(after_if); if (dynamic_cast(e.type_get()) == nullptr) { // And now for the star of the show: The PHI node // This is a value that can take multiple values depending on the path llvm::PHINode* phi = builder_.CreatePHI(llvm_type(*e.elseclause_get().type_get()), 2, "phinode"); phi->addIncoming(then_body, then_block); phi->addIncoming(else_body, else_block); value_ = phi; } } void Translator::operator()(const ast::WhileExp& e) { // Bb containing the test and the branching auto test_bb = llvm::BasicBlock::Create(ctx_, "test", current_function_); auto body_bb = llvm::BasicBlock::Create(ctx_, "body", current_function_); auto after_bb = llvm::BasicBlock::Create(ctx_, "afterloop", current_function_); // Save the after block for breaks loop_end_[&e] = after_bb; // Explicitly fall through from the current block builder_.CreateBr(test_bb); // Start inside the test BasicBlock builder_.SetInsertPoint(test_bb); auto cond_val = translate(e.test_get()); auto zero_val = llvm::ConstantInt::getSigned(cond_val->getType(), 0); auto cmp_val = builder_.CreateICmpNE(cond_val, zero_val, "loopcond"); // Create the branching builder_.CreateCondBr(cmp_val, body_bb, after_bb); // Translate the body inside the body BasicBlock builder_.SetInsertPoint(body_bb); // Don't store the return value, is should be void. translate(e.body_get()); // Go back to the Test BasicBlock builder_.CreateBr(test_bb); // Continue after the loop BasicBlock builder_.SetInsertPoint(after_bb); } void Translator::operator()(const ast::BreakExp& e) { // FIXME DONE: Some code was deleted here. const auto while_node = dynamic_cast(e.def_get()); precondition(loop_end_.contains(while_node)); value_ = builder_.CreateBr(loop_end_.at(while_node)); } void Translator::operator()(const ast::ArrayExp& e) { // Translate the number of elements, // fill the array with the default value, then // return the pointer to the allocated zone. // FIXME DONE: Some code was deleted here (Use `init_array`). llvm::Value* size = translate(e.size_get()); llvm::Value* init_value = translate(e.init_get()); value_ = init_array(size, init_value); } void Translator::operator()(const ast::CastExp& e) { auto exp_val = translate(e.exp_get()); llvm::Type* ltype = nullptr; // FIXME DONE: Some code was deleted here (Destination llvm type). ltype = llvm_type(*e.type_get()); value_ = builder_.CreateBitCast(exp_val, ltype, "cast_exp"); } void Translator::operator()(const ast::FunctionChunk& e) { for (const auto& fdec : e) visit_function_dec_header(*fdec); for (const auto& fdec : e) // There is nothing to translate for primitives. if (fdec->body_get()) visit_function_dec_body(*fdec); } void Translator::visit_function_dec_header(const ast::FunctionDec& e) { bool is_main = e.name_get() == "_main"; bool is_primitive = e.body_get() == nullptr; auto name = function_dec_name(e); const type::Type* node_type = nullptr; // FIXME DONE: Some code was deleted here. node_type = &e.type_get()->actual(); auto& function_type = static_cast(*node_type); auto function_ltype = llvm_function_type(function_type); // Main and primitives have External linkage. // Other Tiger functions are treated as "static" functions in C. auto linkage = is_main || is_primitive ? llvm::Function::ExternalLinkage : llvm::Function::InternalLinkage; auto the_function = llvm::Function::Create(function_ltype, linkage, name, &module_); set_default_attributes(*the_function, e); auto& escaped = escaped_[&function_type]; // Name each argument of the function for (auto arg_it = the_function->arg_begin(); arg_it != the_function->arg_end(); ++arg_it) { auto i = std::distance(the_function->arg_begin(), arg_it); auto var = escaped.size() && static_cast(i) < escaped.size() ? *std::next(escaped_[&function_type].begin(), i) : e.formals_get()[i - escaped.size()]; arg_it->setName(var->name_get().get()); } } void Translator::visit_function_dec_body(const ast::FunctionDec& e) { auto the_function = module_.getFunction(function_dec_name(e)); // Save the old function in case a nested function occurs. auto old_insert_point = builder_.saveIP(); auto old_function = current_function_; current_function_ = the_function; // Create a new basic block to start the function. auto bb = llvm::BasicBlock::Create(ctx_, "entry_"s + e.name_get().get(), the_function); builder_.SetInsertPoint(bb); const type::Type* node_type = nullptr; // FIXME DONE: Some code was deleted here. node_type = &e.type_get()->actual(); auto& function_type = static_cast(*node_type); auto& escaped = escaped_[&function_type]; auto& formals = e.formals_get(); auto arg_it = the_function->arg_begin(); for (const auto var : escaped) { locals_[current_function_][var] = &*arg_it; ++arg_it; } // FIXME DONE: Some code was deleted here (Create alloca instructions for each variable). for (size_t i = 0; i < formals.decs_get().size(); i++) { const ast::VarDec* const parameter = formals.decs_get().at(i); locals_[current_function_][parameter] = arg_it + i; } translate(*e.body_get()); // FIXME DONE: Some code was deleted here (Create a return instruction). if (dynamic_cast(&function_type.result_get().actual()) != nullptr) { value_ = builder_.CreateRetVoid(); } else { if (dynamic_cast(e.body_get()) != nullptr) { value_ = get_dereferenced(value_, e.body_get()->type_get()); } value_ = builder_.CreateRet(value_); } // Validate the generated code, checking for consistency. llvm::verifyFunction(*the_function); // Restore the context of the old function. current_function_ = old_function; builder_.restoreIP(old_insert_point); } void Translator::operator()(const ast::CallExp& e) { // Look up the name in the global module table. // If it's a primitive, rename the call to tc_name. // // Then, add the escaped variables and the rest of the arguments to the // list of arguments, and return the correct value. // FIXME DONE: Some code was deleted here. const auto function_type = dynamic_cast(e.def_get()->type_get()); precondition(function_type != nullptr); std::string function_name = e.name_get().get(); if (e.def_get()->body_get() == nullptr) { // then the call references a primitive function_name = "tc_" + function_name; } llvm::Function* function = module_.getFunction(function_name); std::vector args; std::string twine = "call_" + e.name_get().get(); assertion(function != nullptr); for (auto& var : escaped_[function_type]) { args.push_back(translate(*var)); } for (auto& parameter : e.args_get()) { llvm::Value* variable = translate(*parameter); llvm::Value* value = dynamic_cast(parameter) != nullptr ? get_dereferenced(variable, parameter->type_get()) : variable; args.push_back(value); } if (dynamic_cast(&function_type->result_get().actual()) != nullptr) { twine = ""; } value_ = builder_.CreateCall(function, args, twine); } void Translator::operator()(const ast::VarDec& e) { // Void var types are actually Ints represented by a 0 // FIXME DONE: Some code was deleted here. if (const auto existing_ref = declaration_in_current_scope(e)) { value_ = existing_ref; return; } llvm::Type* variable_type = llvm_type(*e.type_get()); llvm::Value* init_value = translate(*e.init_get()); llvm::Value* variable = create_alloca(current_function_, variable_type, "var_" + e.name_get().get()); locals_[current_function_][&e] = variable; value_ = builder_.CreateStore(init_value, variable); } llvm::Value* Translator::declaration_in_current_scope(const ast::VarDec& e) const { if (!locals_.contains(current_function_)) { return nullptr; } const auto current_scope = locals_.at(current_function_); return current_scope.contains(&e) ? current_scope.at(&e) : nullptr; } } // namespace llvmtranslate