tan  0.0.1
code_generator.cpp
1 #include "codegen/code_generator.h"
2 #include "ast/ast_base.h"
3 #include "ast/type.h"
4 #include "ast/expr.h"
5 #include "ast/stmt.h"
6 #include "ast/decl.h"
7 #include "ast/intrinsic.h"
8 #include "ast/package.h"
9 #include "ast/default_value.h"
10 #include "tan/tan.h"
11 
12 #include <llvm/IR/Value.h>
13 #include <llvm/IR/LLVMContext.h>
14 #include <llvm/IR/IRBuilder.h>
15 #include <llvm/ADT/APFloat.h>
16 #include <llvm/IR/Function.h>
17 #include <llvm/ADT/APInt.h>
18 #include "llvm/ADT/StringRef.h"
19 #include <llvm/IR/Verifier.h>
20 #include <llvm/IR/Instruction.h>
21 #include <llvm/IR/DerivedTypes.h>
22 #include <llvm/MC/TargetRegistry.h>
23 #include <llvm/IR/Module.h>
24 #include <llvm/Support/FileSystem.h>
25 #include <llvm/IR/LegacyPassManager.h>
26 #include <llvm/IR/DataLayout.h>
27 #include <llvm/ExecutionEngine/SectionMemoryManager.h>
28 #include <llvm/IR/GlobalVariable.h>
29 #include <llvm/IR/GlobalValue.h>
30 #include <llvm/Transforms/IPO/PassManagerBuilder.h>
31 #include <llvm/Transforms/Scalar.h>
32 #include <llvm/Transforms/Scalar/GVN.h>
33 #include <llvm/IR/DIBuilder.h>
34 #include <llvm/IR/DebugInfo.h>
35 #include <llvm/Support/CodeGen.h>
36 #include <clang/Basic/TargetOptions.h>
37 #include <clang/Frontend/CompilerInvocation.h>
38 #include <llvm/LinkAllPasses.h>
39 #include <llvm/Option/OptTable.h>
40 #include <llvm/Support/ManagedStatic.h>
41 #include <llvm/Support/Signals.h>
42 #include <llvm/Support/TimeProfiler.h>
43 #include <llvm/Support/raw_ostream.h>
44 #include <llvm/Target/TargetMachine.h>
45 #include <llvm/Analysis/TargetTransformInfo.h>
46 #include <clang/Basic/DiagnosticOptions.h>
47 #include <llvm/ADT/StringSwitch.h>
48 #include <llvm/MC/MCAsmBackend.h>
49 #include <llvm/MC/MCAsmInfo.h>
50 #include <llvm/MC/MCCodeEmitter.h>
51 #include <llvm/MC/MCContext.h>
52 #include <llvm/MC/MCInstrInfo.h>
53 #include <llvm/MC/MCObjectFileInfo.h>
54 #include <llvm/MC/MCObjectWriter.h>
55 #include <llvm/MC/MCParser/MCAsmParser.h>
56 #include <llvm/MC/MCParser/MCTargetAsmParser.h>
57 #include <llvm/MC/MCRegisterInfo.h>
58 #include <llvm/MC/MCSectionMachO.h>
59 #include <llvm/MC/MCStreamer.h>
60 #include <llvm/MC/MCSubtargetInfo.h>
61 #include <llvm/Support/FormattedStream.h>
62 #include <llvm/Support/Process.h>
63 #include <llvm/Support/Regex.h>
64 #include <llvm/Support/StringSaver.h>
65 #include <llvm/ADT/Triple.h>
66 #include <llvm/Object/Archive.h>
67 #include <llvm/Object/IRObjectFile.h>
68 #include <llvm/Object/MachO.h>
69 #include <llvm/Support/ConvertUTF.h>
70 #include <llvm/Support/Errc.h>
71 #include <clang/CodeGen/CodeGenAction.h>
72 #include <lld/Common/Driver.h>
73 #include <llvm/Passes/PassBuilder.h>
74 
75 using llvm::AllocaInst;
76 using llvm::APFloat;
77 using llvm::APInt;
78 using llvm::BasicBlock;
79 using llvm::CGSCCAnalysisManager;
80 using llvm::Constant;
81 using llvm::ConstantArray;
82 using llvm::ConstantFP;
83 using llvm::ConstantInt;
84 using llvm::ConstantPointerNull;
85 using llvm::ConstantStruct;
86 using llvm::DataLayout;
87 using llvm::DebugLoc;
88 using llvm::DIBuilder;
89 using llvm::DICompileUnit;
90 using llvm::DIFile;
91 using llvm::DILocation;
92 using llvm::DINode;
93 using llvm::DIScope;
94 using llvm::DISubprogram;
95 using llvm::DISubroutineType;
96 using llvm::DIType;
97 using llvm::Expected;
98 using llvm::Function;
99 using llvm::FunctionAnalysisManager;
100 using llvm::GlobalValue;
101 using llvm::GlobalVariable;
102 using llvm::IRBuilder;
103 using llvm::LLVMContext;
104 using llvm::LoopAnalysisManager;
105 using llvm::MDNode;
106 using llvm::Metadata;
107 using llvm::Module;
108 using llvm::ModuleAnalysisManager;
109 using llvm::ModulePassManager;
110 using llvm::PassBuilder;
111 using llvm::PassManagerBuilder;
112 using llvm::PHINode;
113 using llvm::SectionMemoryManager;
114 using llvm::StringRef;
115 using llvm::TargetMachine;
116 using llvm::Triple;
117 using llvm::Value;
118 using llvm::verifyFunction;
119 using llvm::legacy::FunctionPassManager;
120 using llvm::legacy::PassManager;
121 
122 #define AST_LINENO(p) (p->src()->get_line(p->start()))
123 #define AST_COL(p) (p->src()->get_col(p->start()))
124 
125 namespace tanlang {
126 
127 void CodeGenerator::init(Package *package) {
128  _llvm_ctx = new LLVMContext();
129  _builder = new IRBuilder<>(*_llvm_ctx);
130  _module = new Module(package->get_name(), *_llvm_ctx);
131  _module->setDataLayout(_target_machine->createDataLayout());
132  _module->setTargetTriple(_target_machine->getTargetTriple().str());
133 
134  /// add_ctx the current debug info version into the module
135  _module->addModuleFlag(Module::Warning, "Dwarf Version", llvm::dwarf::DWARF_VERSION);
136  _module->addModuleFlag(Module::Warning, "Debug Info Version", llvm::DEBUG_METADATA_VERSION);
137 
138  /// debug related
139  _di_builder = new DIBuilder(*_module);
140 
141  auto *di_package = _di_builder->createFile("<package-" + package->get_name() + ">", ".");
142 
143  // FIXME: isOptimized flag
144  auto *cu = _di_builder->createCompileUnit(llvm::dwarf::DW_LANG_C, di_package, package->get_name(), false, "", 0);
145  // NOTE: compilation unit is bound to package-level DIFile, so we must set the root scope to this so that all
146  // top-level declarations are bound to it as well.
147  _di_scope.push_back(di_package);
148 }
149 
150 CodeGenerator::CodeGenerator(TargetMachine *target_machine) : _target_machine(target_machine) {}
151 
152 CodeGenerator::~CodeGenerator() {
153  if (_di_builder)
154  delete _di_builder;
155  if (_module)
156  delete _module;
157  if (_builder)
158  delete _builder;
159  if (_llvm_ctx)
160  delete _llvm_ctx;
161 }
162 
163 void CodeGenerator::run_impl(Package *package) {
164  // Not using cached_visit because we package doesn't have a source file instance, and there's no need to set debug
165  // location or cache value of this node.
166  visit(package);
167 
168  _di_builder->finalize(); // do this before any pass
169 
170  run_passes();
171 }
172 
173 llvm::Value *CodeGenerator::cached_visit(ASTBase *p) {
174  auto it = _llvm_value_cache.find(p);
175  if (it != _llvm_value_cache.end()) {
176  return it->second;
177  }
178  set_current_debug_location(p);
179 
180  visit(p);
181 
182  return _llvm_value_cache[p];
183 }
184 
185 void CodeGenerator::default_visit(ASTBase *) { TAN_ASSERT(false); }
186 
187 void CodeGenerator::run_passes() {
188  auto opt_level = _target_machine->getOptLevel();
189  bool debug = opt_level == llvm::CodeGenOpt::Level::None;
190 
191  if (!debug) {
192  // Create the analysis managers
193  LoopAnalysisManager LAM;
194  FunctionAnalysisManager FAM;
195  CGSCCAnalysisManager CGAM;
196  ModuleAnalysisManager MAM;
197 
198  // Create the new pass manager builder
199  PassBuilder PB(_target_machine);
200 
201  // Register all the basic analyses with the managers
202  PB.registerModuleAnalyses(MAM);
203  PB.registerCGSCCAnalyses(CGAM);
204  PB.registerFunctionAnalyses(FAM);
205  PB.registerLoopAnalyses(LAM);
206  PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
207 
208  // Create the pass manager.
209  // TODO: opt level
210  // FIXME: string.tan failed if using optimization
211  ModulePassManager MPM = PB.buildPerModuleDefaultPipeline(llvm::OptimizationLevel::O2);
212  // Optimize the IR
213  MPM.run(*_module, MAM);
214  }
215 }
216 
217 void CodeGenerator::emit_to_file(const str &filename) {
218  std::error_code ec;
219  llvm::raw_fd_ostream dest(filename, ec, llvm::sys::fs::OF_None);
220  if (ec) {
221  Error err("Could not open file: " + ec.message());
222  err.raise();
223  }
224  PassManager emit_pass;
225  auto file_type = llvm::CGFT_ObjectFile;
226  if (_target_machine->addPassesToEmitFile(emit_pass, dest, nullptr, file_type)) {
227  Error err("Target machine can't emit a file of this type");
228  err.raise();
229  }
230  emit_pass.run(*_module);
231  dest.flush();
232 }
233 
234 void CodeGenerator::dump_ir() const { _module->print(llvm::outs(), nullptr); }
235 
236 // ===================================================
237 
238 DIFile *CodeGenerator::get_or_create_di_file(ASTBase *p) {
239  auto *src = p->src();
240 
241  auto q = _di_files.find(src);
242  if (q != _di_files.end()) {
243  return q->second;
244  }
245 
246  auto *ret = _di_builder->createFile(src->get_filename(), ".");
247  _di_files[src] = ret;
248 
249  return ret;
250 }
251 
252 AllocaInst *CodeGenerator::create_block_alloca(BasicBlock *block, llvm::Type *type, size_t size, const str &name) {
253  block = &block->getParent()->getEntryBlock();
254  IRBuilder<> tmp_builder(block, block->begin());
255  if (size <= 1) {
256  return tmp_builder.CreateAlloca(type, nullptr, name);
257  } else {
258  return tmp_builder.CreateAlloca(type, tmp_builder.getInt32((unsigned)size), name);
259  }
260 }
261 
262 llvm::Value *CodeGenerator::convert_llvm_type_to(Expr *expr, Type *dest) {
263  /// load if lvalue
264  Value *loaded = load_if_is_lvalue(expr);
265 
266  Type *orig = expr->get_type();
267 
268  bool is_pointer1 = orig->is_pointer();
269  bool is_pointer2 = dest->is_pointer();
270 
271  /// early return if types are the same
272  if (orig == dest) {
273  return loaded;
274  };
275  if (is_pointer1 && is_pointer2) {
276  /// cast between pointer types (including pointers to pointers)
277  return _builder->CreateBitCast(loaded, to_llvm_type(dest));
278  } else if ((orig->is_int() || orig->is_char()) && (dest->is_char() || dest->is_int())) { /// between int
279  if (dest->is_unsigned())
280  return _builder->CreateZExtOrTrunc(loaded, to_llvm_type(dest));
281  else
282  return _builder->CreateSExtOrTrunc(loaded, to_llvm_type(dest));
283  } else if (orig->is_int() && dest->is_float()) { /// int to float/double
284  if (orig->is_unsigned()) {
285  return _builder->CreateUIToFP(loaded, to_llvm_type(dest));
286  } else {
287  return _builder->CreateSIToFP(loaded, to_llvm_type(dest));
288  }
289  } else if (orig->is_float() && dest->is_int()) { /// float/double to int
290  if (dest->is_unsigned()) {
291  return _builder->CreateFPToUI(loaded, to_llvm_type(dest));
292  } else {
293  return _builder->CreateFPToSI(loaded, to_llvm_type(dest));
294  }
295  } else if (orig->is_float() && dest->is_float()) { /// float <-> double
296  return _builder->CreateFPCast(loaded, to_llvm_type(dest));
297  } else if (orig->is_bool() && dest->is_int()) { /// bool to int
298  return _builder->CreateZExtOrTrunc(loaded, to_llvm_type(dest));
299  } else if (orig->is_bool() && dest->is_float()) { /// bool to float
300  return _builder->CreateUIToFP(loaded, to_llvm_type(dest));
301  } else if (dest->is_bool()) {
302  if (orig->is_float()) { /// float to bool
303  if (orig->get_size_bits() == 32) {
304  return _builder->CreateFCmpONE(loaded, ConstantFP::get(_builder->getFloatTy(), 0.0f));
305  } else {
306  return _builder->CreateFCmpONE(loaded, ConstantFP::get(_builder->getDoubleTy(), 0.0f));
307  }
308  } else if (orig->is_pointer()) { /// pointer to bool
309  size_t s1 = _target_machine->getPointerSizeInBits(0);
310  loaded = _builder->CreatePtrToInt(loaded, _builder->getIntNTy((unsigned)s1));
311  return _builder->CreateICmpNE(loaded, ConstantInt::get(_builder->getIntNTy((unsigned)s1), 0, false));
312  } else if (orig->is_int()) { /// int to bool
313  return _builder->CreateICmpNE(loaded,
314  ConstantInt::get(_builder->getIntNTy((unsigned)orig->get_size_bits()), 0, false));
315  }
316  } else if (orig->is_string() && dest->is_pointer()) { /// string to pointer, don't need to do anything
317  return loaded;
318  } else if (orig->is_array() && dest->is_pointer()) { /// array to pointer, don't need to do anything
319  return loaded;
320  } else if (orig->is_array() && dest->is_string()) { /// array to string, don't need to do anything
321  return loaded;
322  }
323 
324  error(ErrorType::SEMANTIC_ERROR, expr, "Cannot perform type conversion");
325 }
326 
327 llvm::Value *CodeGenerator::load_if_is_lvalue(Expr *expr) {
328  Value *val = _llvm_value_cache[expr];
329  TAN_ASSERT(val);
330 
331  if (expr->is_lvalue()) {
332  return _builder->CreateLoad(to_llvm_type(expr->get_type()), val, "lvalue_load");
333  }
334  return val;
335 }
336 
337 llvm::Type *CodeGenerator::to_llvm_type(Type *p) {
338  TAN_ASSERT(p);
339  TAN_ASSERT(!p->is_ref());
340 
341  auto it = _llvm_type_cache.find(p);
342  if (it != _llvm_type_cache.end()) {
343  return it->second;
344  }
345 
346  llvm::Type *ret = nullptr;
347 
348  if (p->is_primitive()) { /// primitive types
349  int size_bits = p->get_size_bits();
350  if (p->is_int()) {
351  ret = _builder->getIntNTy((unsigned)size_bits);
352  } else if (p->is_char()) {
353  ret = _builder->getInt8Ty();
354  } else if (p->is_bool()) {
355  ret = _builder->getInt1Ty();
356  } else if (p->is_float()) {
357  if (32 == size_bits) {
358  ret = _builder->getFloatTy();
359  } else if (64 == size_bits) {
360  ret = _builder->getDoubleTy();
361  } else {
362  TAN_ASSERT(false);
363  }
364  } else if (p->is_void()) {
365  ret = _builder->getVoidTy();
366  }
367  } else if (p->is_string()) { /// str as char*
368  ret = _builder->getInt8PtrTy();
369  } else if (p->is_struct()) { /// struct
370  // avoid infinite recursion
371  _llvm_type_cache[p] = ret = llvm::StructType::create(*_llvm_ctx, p->get_typename());
372  auto types = pcast<StructType>(p)->get_member_types();
373  vector<llvm::Type *> elements(types.size(), nullptr);
374  for (size_t i = 0; i < types.size(); ++i) {
375  elements[i] = to_llvm_type(types[i]);
376  }
377  ((llvm::StructType *)ret)->setBody(elements);
378  } else if (p->is_array()) { /// array as pointer
379  auto *e_type = to_llvm_type(pcast<ArrayType>(p)->get_element_type());
380  ret = e_type->getPointerTo();
381  } else if (p->is_pointer()) { /// pointer
382  auto *e_type = to_llvm_type(pcast<PointerType>(p)->get_pointee());
383  ret = e_type->getPointerTo();
384  } else if (p->is_function()) {
385  auto *func_type = pcast<tanlang::FunctionType>(p);
386  vector<llvm::Type *> arg_types{};
387  for (auto *t : func_type->get_arg_types()) {
388  arg_types.push_back(to_llvm_type(t));
389  }
390  auto *ret_type = to_llvm_type(func_type->get_return_type());
391  ret = llvm::FunctionType::get(ret_type, arg_types, false);
392  } else {
393  TAN_ASSERT(false);
394  }
395 
396  _llvm_type_cache[p] = ret;
397  return ret;
398 }
399 
400 llvm::Metadata *CodeGenerator::to_llvm_metadata(Type *p, DIFile *di_file, uint32_t lineno) {
401  TAN_ASSERT(p);
402  TAN_ASSERT(!p->is_ref());
403 
404  auto it = _llvm_meta_cache.find(p);
405  if (it != _llvm_meta_cache.end()) {
406  return it->second;
407  }
408 
409  DIType *ret = nullptr;
410  if (p->is_primitive()) { /// primitive types
411  unsigned dwarf_encoding = 0;
412  int size_bits = p->get_size_bits();
413  if (p->is_int()) {
414  if (p->is_unsigned()) {
415  if (size_bits == 8) {
416  dwarf_encoding = llvm::dwarf::DW_ATE_unsigned_char;
417  } else {
418  dwarf_encoding = llvm::dwarf::DW_ATE_unsigned;
419  }
420  } else {
421  if (size_bits == 8) {
422  dwarf_encoding = llvm::dwarf::DW_ATE_signed_char;
423  } else {
424  dwarf_encoding = llvm::dwarf::DW_ATE_signed;
425  }
426  }
427  } else if (p->is_char()) {
428  dwarf_encoding = llvm::dwarf::DW_ATE_signed_char;
429  } else if (p->is_bool()) {
430  dwarf_encoding = llvm::dwarf::DW_ATE_boolean;
431  } else if (p->is_float()) {
432  dwarf_encoding = llvm::dwarf::DW_ATE_float;
433  } else if (p->is_void()) {
434  dwarf_encoding = llvm::dwarf::DW_ATE_signed;
435  }
436 
437  ret = _di_builder->createBasicType(p->get_typename(), (uint64_t)size_bits, dwarf_encoding);
438  } else if (p->is_string()) { /// str as char*
439  auto *e_di_type = _di_builder->createBasicType("u8", 8, llvm::dwarf::DW_ATE_unsigned_char);
440  ret = _di_builder->createPointerType(e_di_type, _target_machine->getPointerSizeInBits(0),
441  _target_machine->getPointerSizeInBits(0), std::nullopt, p->get_typename());
442  } else if (p->is_struct()) { /// struct
443  auto member_types = pcast<StructType>(p)->get_member_types();
444  unsigned n = (unsigned)member_types.size();
445 
446  // avoid infinite recursion by inserting a placeholder
447  ret = _di_builder->createStructType(
448  di_file, p->get_typename(), di_file, (unsigned)lineno, (uint32_t)p->get_size_bits(),
449  (uint32_t)p->get_align_bits(), DINode::DIFlags::FlagZero, nullptr,
450  _di_builder->getOrCreateArray(vector<Metadata *>(n, nullptr)), 0, nullptr, p->get_typename());
451 
452  vector<Metadata *> elements(member_types.size(), nullptr);
453  for (unsigned i = 0; i < n; ++i) {
454  elements[i] = to_llvm_metadata(member_types[i], di_file, lineno);
455  }
456  // work around replaceElements()'s check
457  ret->replaceOperandWith(4, _di_builder->getOrCreateArray(elements).get());
458  } else if (p->is_array()) { /// array as pointer
459  auto *sub = to_llvm_metadata(pcast<ArrayType>(p)->get_element_type(), di_file, lineno);
460  ret = _di_builder->createPointerType((DIType *)sub, _target_machine->getPointerSizeInBits(0),
461  _target_machine->getPointerSizeInBits(0), std::nullopt, p->get_typename());
462  } else if (p->is_pointer()) { /// pointer
463  // avoid infinite recursion by inserting a placeholder
464  _llvm_meta_cache[p] = ret =
465  _di_builder->createPointerType(nullptr, _target_machine->getPointerSizeInBits(0),
466  _target_machine->getPointerSizeInBits(0), std::nullopt, p->get_typename());
467  auto *sub = to_llvm_metadata(pcast<PointerType>(p)->get_pointee(), di_file, lineno);
468  ret->replaceOperandWith(3, sub);
469  } else {
470  TAN_ASSERT(false);
471  }
472 
473  return _llvm_meta_cache[p] = ret;
474 }
475 
476 llvm::DISubroutineType *CodeGenerator::create_function_debug_info_type(llvm::Metadata *ret,
477  vector<llvm::Metadata *> args) {
478  vector<Metadata *> types{ret};
479  types.reserve(args.size());
480  types.insert(types.begin() + 1, args.begin(), args.end());
481  // return _di_builder->createSubroutineType(_di_builder->getOrCreateTypeArray(types), DINode::FlagZero,
482  // llvm::dwarf::DW_CC_normal);
483  return _di_builder->createSubroutineType(_di_builder->getOrCreateTypeArray(types));
484 }
485 
486 void CodeGenerator::set_current_debug_location(ASTBase *p) {
487  unsigned line = AST_LINENO(p) + 1;
488  unsigned col = AST_COL(p) + 1;
489  _builder->SetCurrentDebugLocation(DILocation::get(*_llvm_ctx, line, col, get_current_di_scope()));
490 }
491 
492 DIScope *CodeGenerator::get_current_di_scope() const { return _di_scope.back(); }
493 void CodeGenerator::push_di_scope(DIScope *scope) { _di_scope.push_back(scope); }
494 void CodeGenerator::pop_di_scope() { _di_scope.pop_back(); }
495 
496 DebugLoc CodeGenerator::debug_loc_of_node(ASTBase *p, MDNode *scope) {
497  return DILocation::get(*_llvm_ctx, AST_LINENO(p), AST_COL(p), scope);
498 }
499 
500 // ===================================================
501 
502 Value *CodeGenerator::codegen_var_arg_decl(Decl *p) {
503  llvm::Type *type = to_llvm_type(p->get_type());
504  auto *ret = create_block_alloca(_builder->GetInsertBlock(), type, 1, p->get_name());
505 
506  // default value of var declaration
507  if (p->get_node_type() == ASTNodeType::VAR_DECL) {
508  Value *default_value = codegen_type_default_value(p->get_type());
509  TAN_ASSERT(default_value);
510  _builder->CreateStore(default_value, ret);
511  }
512 
513  // debug info
514  auto *curr_di_scope = get_current_di_scope();
515  auto *di_file = get_or_create_di_file(p);
516  auto *arg_meta = to_llvm_metadata(p->get_type(), di_file, AST_LINENO(p));
517  auto *di_arg =
518  _di_builder->createAutoVariable(curr_di_scope, p->get_name(), di_file, AST_LINENO(p), (DIType *)arg_meta);
519  _di_builder->insertDeclare(ret, di_arg, _di_builder->createExpression(), debug_loc_of_node(p, curr_di_scope),
520  _builder->GetInsertBlock());
521  return ret;
522 }
523 
524 Value *CodeGenerator::codegen_struct_default_value(StructType *ty) {
525  StructDecl *struct_decl = ty->get_decl();
526 
527  auto member_types = ty->get_member_types();
528  TAN_ASSERT(member_types.size() == struct_decl->get_member_decls().size());
529 
530  vector<Constant *> values(member_types.size(), nullptr);
531  for (size_t i = 0; i < member_types.size(); ++i) {
532  Expr *v = struct_decl->get_member_default_val((int)i);
533 
534  if (v) {
535  TAN_ASSERT(v->is_comptime_known());
536  // default value is set in the struct definition
537  values[i] = (llvm::Constant *)cached_visit(v);
538  } else {
539  values[i] = (llvm::Constant *)codegen_type_default_value(member_types[i]);
540  }
541  }
542 
543  return ConstantStruct::get((llvm::StructType *)to_llvm_type(ty), values);
544 }
545 
546 Value *CodeGenerator::codegen_type_default_value(Type *p) {
547  TAN_ASSERT(!p->is_ref());
548 
549  // FIXME: might need something better
550  auto *src = new TokenizedSourceFile("__plain_type_default_value__", {});
551 
552  Value *ret = nullptr;
553  if (p->is_primitive() || p->is_string() || p->is_array() || p->is_pointer()) {
554  ret = cached_visit(DefaultValue::CreateTypeDefaultValueLiteral(src, p));
555 
556  } else if (p->is_struct()) {
557  ret = codegen_struct_default_value(pcast<StructType>(p));
558  } else {
559  TAN_ASSERT(false);
560  }
561 
562  return ret;
563 }
564 
565 Value *CodeGenerator::codegen_literals(Literal *p) {
566  llvm::Type *type = to_llvm_type(p->get_type());
567  Value *ret = nullptr;
568  Type *ptype = p->get_type();
569  if (ptype->is_primitive()) { /// primitive types
570  int size_bits = ptype->get_size_bits();
571 
572  if (ptype->is_char()) { // NOTE: must be before is_int() check because char is technically an integer
573  ret = ConstantInt::get(type, pcast<CharLiteral>(p)->get_value());
574  } else if (ptype->is_int()) {
575  auto pp = pcast<IntegerLiteral>(p);
576  ret = ConstantInt::get(_builder->getIntNTy((unsigned)size_bits), pp->get_value(), !pp->is_unsigned());
577  } else if (ptype->is_bool()) {
578  auto pp = pcast<BoolLiteral>(p);
579  ret = ConstantInt::get(type, (uint64_t)pp->get_value());
580  } else if (ptype->is_float()) {
581  ret = ConstantFP::get(type, pcast<FloatLiteral>(p)->get_value());
582  } else {
583  TAN_ASSERT(false);
584  }
585  } else if (ptype->is_string()) { /// str as char*
586  ret = _builder->CreateGlobalStringPtr(pcast<StringLiteral>(p)->get_value());
587  } else if (ptype->is_struct()) { /// struct
588  // TODO: Implement struct literal
589  TAN_ASSERT(false);
590  } else if (ptype->is_array()) { /// array as pointer
591  auto arr = pcast<ArrayLiteral>(p);
592 
593  /// element type
594  auto elements = arr->get_elements();
595  auto *e_type = to_llvm_type(pcast<ArrayType>(ptype)->get_element_type());
596 
597  /// codegen element values
598  size_t n = elements.size();
599  ret = create_block_alloca(_builder->GetInsertBlock(), e_type, n, "array_storage");
600  for (size_t i = 0; i < n; ++i) {
601  auto *idx = _builder->getInt32((unsigned)i);
602  auto *e_val = cached_visit(elements[i]);
603  auto *e_ptr = _builder->CreateGEP(e_type, ret, idx);
604  _builder->CreateStore(e_val, e_ptr);
605  }
606  } else if (ptype->is_pointer()) { /// the pointer literal is nullptr
607  ret = ConstantPointerNull::get((llvm::PointerType *)type);
608  } else {
609  TAN_ASSERT(false);
610  }
611 
612  return ret;
613 }
614 
615 Value *CodeGenerator::codegen_func_prototype(FunctionDecl *p, bool import_) {
616  auto linkage = Function::InternalLinkage;
617  if (p->is_external()) {
618  linkage = Function::ExternalWeakLinkage;
619  }
620  if (p->is_public()) {
621  if (import_) {
622  linkage = Function::ExternalWeakLinkage;
623  } else {
624  linkage = Function::ExternalLinkage;
625  }
626  }
627  Function *func = Function::Create((llvm::FunctionType *)to_llvm_type(p->get_type()), linkage, p->get_name(), _module);
628  func->setCallingConv(llvm::CallingConv::C);
629  return func;
630 }
631 
632 Value *CodeGenerator::codegen_ptr_deref(UnaryOperator *p) {
633  auto *rhs = p->get_rhs();
634  Value *val = cached_visit(rhs);
635  TAN_ASSERT(val->getType()->isPointerTy());
636 
637  /// load only if the pointer itself is an lvalue, so that the value after deref is always an lvalue
638  if (rhs->is_lvalue()) {
639  val = _builder->CreateLoad(to_llvm_type(rhs->get_type()), val, "ptr_deref");
640  }
641  return val;
642 }
643 
644 Value *CodeGenerator::codegen_relop(BinaryOperator *p) {
645  auto lhs = p->get_lhs();
646  auto rhs = p->get_rhs();
647  Value *l = cached_visit(lhs);
648  Value *r = cached_visit(rhs);
649 
650  r = load_if_is_lvalue(rhs);
651  l = load_if_is_lvalue(lhs);
652 
653  Value *ret = nullptr;
654  switch (p->get_op()) {
655  case BinaryOpKind::BAND:
656  ret = _builder->CreateAnd(l, r, "binary_and");
657  break;
658  case BinaryOpKind::LAND:
659  ret = _builder->CreateAnd(l, r, "logical_and");
660  break;
661  case BinaryOpKind::BOR:
662  ret = _builder->CreateOr(l, r, "binary_or");
663  break;
664  case BinaryOpKind::LOR:
665  ret = _builder->CreateOr(l, r, "logical_or");
666  break;
667  case BinaryOpKind::XOR:
668  ret = _builder->CreateXor(l, r, "logical_or");
669  break;
670  default:
671  TAN_ASSERT(false);
672  break;
673  }
674 
675  TAN_ASSERT(ret);
676  return ret;
677 }
678 
679 Value *CodeGenerator::codegen_bnot(UnaryOperator *p) {
680  auto *rhs = cached_visit(p->get_rhs());
681  if (!rhs) {
682  error(ErrorType::SEMANTIC_ERROR, p, "Invalid operand");
683  }
684  if (p->get_rhs()->is_lvalue()) {
685  rhs = _builder->CreateLoad(to_llvm_type(p->get_rhs()->get_type()), rhs);
686  }
687  return _builder->CreateNot(rhs);
688 }
689 
690 Value *CodeGenerator::codegen_lnot(UnaryOperator *p) {
691  auto *rhs = cached_visit(p->get_rhs());
692 
693  if (!rhs) {
694  error(ErrorType::SEMANTIC_ERROR, p, "Invalid operand");
695  }
696 
697  if (p->get_rhs()->is_lvalue()) {
698  rhs = _builder->CreateLoad(to_llvm_type(p->get_rhs()->get_type()), rhs);
699  }
700  /// get value size in bits
701  auto size_in_bits = rhs->getType()->getPrimitiveSizeInBits();
702  if (rhs->getType()->isFloatingPointTy()) {
703  return _builder->CreateFCmpOEQ(rhs, ConstantFP::get(_builder->getFloatTy(), 0.0f));
704  } else if (rhs->getType()->isSingleValueType()) {
705  return _builder->CreateICmpEQ(rhs, ConstantInt::get(_builder->getIntNTy((unsigned)size_in_bits), 0, false));
706  }
707 
708  error(ErrorType::SEMANTIC_ERROR, p, "Invalid operand");
709 }
710 
711 Value *CodeGenerator::codegen_address_of(UnaryOperator *p) {
712  if (!p->get_rhs()->is_lvalue()) {
713  error(ErrorType::SEMANTIC_ERROR, p, "Cannot get address of rvalue");
714  }
715 
716  return cached_visit(p->get_rhs());
717 }
718 
719 Value *CodeGenerator::codegen_arithmetic(BinaryOperator *p) {
720  /// binary operator
721  auto *lhs = p->get_lhs();
722  auto *rhs = p->get_rhs();
723  Value *l = cached_visit(lhs);
724  Value *r = cached_visit(rhs);
725  r = load_if_is_lvalue(rhs);
726  l = load_if_is_lvalue(lhs);
727 
728  Value *ret = nullptr;
729  if (l->getType()->isFloatingPointTy()) {
730  /// float arithmetic
731  switch (p->get_op()) {
732  case BinaryOpKind::MULTIPLY:
733  ret = _builder->CreateFMul(l, r, "mul_tmp");
734  break;
735  case BinaryOpKind::DIVIDE:
736  ret = _builder->CreateFDiv(l, r, "div_tmp");
737  break;
738  case BinaryOpKind::SUM:
739  ret = _builder->CreateFAdd(l, r, "sum_tmp");
740  break;
741  case BinaryOpKind::SUBTRACT:
742  ret = _builder->CreateFSub(l, r, "sub_tmp");
743  break;
744  case BinaryOpKind::MOD:
745  ret = _builder->CreateFRem(l, r, "mod_tmp");
746  break;
747  default:
748  TAN_ASSERT(false);
749  break;
750  }
751  } else {
752  /// integer arithmetic
753  switch (p->get_op()) {
754  case BinaryOpKind::MULTIPLY:
755  ret = _builder->CreateMul(l, r, "mul_tmp");
756  break;
757  case BinaryOpKind::DIVIDE: {
758  auto ty = lhs->get_type();
759  if (ty->is_unsigned()) {
760  ret = _builder->CreateUDiv(l, r, "div_tmp");
761  } else {
762  ret = _builder->CreateSDiv(l, r, "div_tmp");
763  }
764  break;
765  }
766  case BinaryOpKind::SUM:
767  ret = _builder->CreateAdd(l, r, "sum_tmp");
768  break;
769  case BinaryOpKind::SUBTRACT:
770  ret = _builder->CreateSub(l, r, "sub_tmp");
771  break;
772  case BinaryOpKind::MOD: {
773  auto ty = lhs->get_type();
774  if (ty->is_unsigned()) {
775  ret = _builder->CreateURem(l, r, "mod_tmp");
776  } else {
777  ret = _builder->CreateSRem(l, r, "mod_tmp");
778  }
779  break;
780  }
781  default:
782  TAN_ASSERT(false);
783  break;
784  }
785  }
786 
787  TAN_ASSERT(ret);
788  return ret;
789 }
790 
791 Value *CodeGenerator::codegen_comparison(BinaryOperator *p) {
792  auto lhs = p->get_lhs();
793  auto rhs = p->get_rhs();
794  Value *l = cached_visit(lhs);
795  Value *r = cached_visit(rhs);
796 
797  bool is_signed = !lhs->get_type()->is_unsigned();
798  r = load_if_is_lvalue(rhs);
799  l = load_if_is_lvalue(lhs);
800 
801  Value *ret = nullptr;
802  if (l->getType()->isFloatingPointTy()) {
803  switch (p->get_op()) {
804  case BinaryOpKind::EQ:
805  ret = _builder->CreateFCmpOEQ(l, r, "eq");
806  break;
807  case BinaryOpKind::NE:
808  ret = _builder->CreateFCmpONE(l, r, "ne");
809  break;
810  case BinaryOpKind::GT:
811  ret = _builder->CreateFCmpOGT(l, r, "gt");
812  break;
813  case BinaryOpKind::GE:
814  ret = _builder->CreateFCmpOGE(l, r, "ge");
815  break;
816  case BinaryOpKind::LT:
817  ret = _builder->CreateFCmpOLT(l, r, "lt");
818  break;
819  case BinaryOpKind::LE:
820  ret = _builder->CreateFCmpOLE(l, r, "le");
821  break;
822  default:
823  TAN_ASSERT(false);
824  break;
825  }
826  } else {
827  switch (p->get_op()) {
828  case BinaryOpKind::EQ:
829  ret = _builder->CreateICmpEQ(l, r, "eq");
830  break;
831  case BinaryOpKind::NE:
832  ret = _builder->CreateICmpNE(l, r, "ne");
833  break;
834  case BinaryOpKind::GT:
835  if (is_signed) {
836  ret = _builder->CreateICmpSGT(l, r, "gt");
837  } else {
838  ret = _builder->CreateICmpUGT(l, r, "gt");
839  }
840  break;
841  case BinaryOpKind::GE:
842  if (is_signed) {
843  ret = _builder->CreateICmpSGE(l, r, "ge");
844  } else {
845  ret = _builder->CreateICmpUGE(l, r, "ge");
846  }
847  break;
848  case BinaryOpKind::LT:
849  if (is_signed) {
850  ret = _builder->CreateICmpSLT(l, r, "lt");
851  } else {
852  ret = _builder->CreateICmpULT(l, r, "lt");
853  }
854  break;
855  case BinaryOpKind::LE:
856  if (is_signed) {
857  ret = _builder->CreateICmpSLE(l, r, "le");
858  } else {
859  ret = _builder->CreateICmpULE(l, r, "le");
860  }
861  break;
862  default:
863  TAN_ASSERT(false);
864  break;
865  }
866  }
867 
868  TAN_ASSERT(ret);
869  return ret;
870 }
871 
872 Value *CodeGenerator::codegen_member_access(BinaryOperator *_p) {
873  auto *p = pcast<MemberAccess>(_p);
874  auto lhs = p->get_lhs();
875  auto rhs = p->get_rhs();
876 
877  auto *lhs_val = cached_visit(lhs);
878 
879  Value *ret = nullptr;
880  switch (p->_access_type) {
881  case MemberAccess::MemberAccessBracket: {
882  lhs_val = load_if_is_lvalue(lhs);
883 
884  cached_visit(rhs);
885  auto *rhs_val = load_if_is_lvalue(rhs);
886 
887  llvm::Type *element_type = nullptr;
888  if (lhs->get_type()->is_array()) { /// array
889  auto *lhs_type = pcast<tanlang::ArrayType>(lhs->get_type());
890  element_type = to_llvm_type(lhs_type->get_element_type());
891  } else if (lhs->get_type()->is_string()) { /// string
892  element_type = llvm::Type::getInt8Ty(*_llvm_ctx);
893  } else if (lhs->get_type()->is_pointer()) { /// pointer
894  auto *lhs_type = pcast<tanlang::PointerType>(lhs->get_type());
895  element_type = to_llvm_type(lhs_type->get_pointee());
896  } else {
897  TAN_ASSERT(false);
898  }
899  ret = _builder->CreateGEP(element_type, lhs_val, rhs_val, "bracket_access");
900  break;
901  }
902  case MemberAccess::MemberAccessMemberVariable: {
903  StructType *st = nullptr;
904  if (lhs->get_type()->is_pointer()) { /// auto dereference pointers
905  lhs_val = load_if_is_lvalue(lhs);
906  st = pcast<StructType>(pcast<PointerType>(lhs->get_type())->get_pointee());
907  } else {
908  st = pcast<StructType>(lhs->get_type());
909  }
910  TAN_ASSERT(st->is_struct());
911  TAN_ASSERT(lhs_val->getType()->isPointerTy());
912 
913  lhs_val->getType()->print(llvm::outs());
914  ret = _builder->CreateStructGEP(to_llvm_type(st), lhs_val, (unsigned)p->_access_idx, "member_variable");
915  break;
916  }
917  case MemberAccess::MemberAccessMemberFunction:
918  // TODO: codegen for member function call
919  ret = cached_visit(rhs);
920  break;
921  default:
922  TAN_ASSERT(false);
923  }
924 
925  return ret;
926 }
927 
928 // ===================================================
929 
930 DEFINE_AST_VISITOR_IMPL(CodeGenerator, Package) {
931  for (auto *c : p->get_children()) {
932  cached_visit(c);
933  }
934 }
935 
936 DEFINE_AST_VISITOR_IMPL(CodeGenerator, Identifier) {
937  switch (p->get_id_type()) {
938  case IdentifierType::ID_VAR_REF:
939  _llvm_value_cache[p] = cached_visit(p->get_var_ref());
940  break;
941  case IdentifierType::ID_TYPE_REF:
942  default:
943  TAN_ASSERT(false);
944  break;
945  }
946 }
947 
948 DEFINE_AST_VISITOR_IMPL(CodeGenerator, Parenthesis) { _llvm_value_cache[p] = cached_visit(p->get_sub()); }
949 
950 DEFINE_AST_VISITOR_IMPL(CodeGenerator, If) {
951  Function *func = _builder->GetInsertBlock()->getParent();
952  size_t n = p->get_num_branches();
953 
954  /// create basic blocks
955  vector<BasicBlock *> cond_blocks(n);
956  vector<BasicBlock *> then_blocks(n);
957  for (size_t i = 0; i < n; ++i) {
958  cond_blocks[i] = BasicBlock::Create(*_llvm_ctx, "cond", func);
959  then_blocks[i] = BasicBlock::Create(*_llvm_ctx, "branch", func);
960  }
961  BasicBlock *merge_bb = BasicBlock::Create(*_llvm_ctx, "endif", func);
962 
963  /// codegen branches
964  _builder->CreateBr(cond_blocks[0]);
965  for (size_t i = 0; i < n; ++i) {
966  /// condition
967  _builder->SetInsertPoint(cond_blocks[i]);
968 
969  Expr *cond = p->get_predicate(i);
970  if (!cond) { /// else clause, immediately go to then block
971  TAN_ASSERT(i == n - 1); /// only the last branch can be an else
972  _builder->CreateBr(then_blocks[i]);
973  } else {
974  cached_visit(cond);
975  Value *cond_v = load_if_is_lvalue(cond);
976  if (i < n - 1) {
977  _builder->CreateCondBr(cond_v, then_blocks[i], cond_blocks[i + 1]);
978  } else {
979  _builder->CreateCondBr(cond_v, then_blocks[i], merge_bb);
980  }
981  }
982 
983  /// then clause
984  _builder->SetInsertPoint(then_blocks[i]);
985  cached_visit(p->get_branch(i));
986 
987  /// go to merge block if there is no terminator instruction at the end of then
988  if (!_builder->GetInsertBlock()->back().isTerminator()) {
989  _builder->CreateBr(merge_bb);
990  }
991  }
992 
993  /// emit merge block
994  _builder->SetInsertPoint(merge_bb);
995 
996  _llvm_value_cache[p] = nullptr;
997 }
998 
999 DEFINE_AST_VISITOR_IMPL(CodeGenerator, VarDecl) { _llvm_value_cache[p] = codegen_var_arg_decl(p); }
1000 
1001 DEFINE_AST_VISITOR_IMPL(CodeGenerator, ArgDecl) { _llvm_value_cache[p] = codegen_var_arg_decl(p); }
1002 
1003 DEFINE_AST_VISITOR_IMPL(CodeGenerator, Return) {
1004  auto rhs = p->get_rhs();
1005  if (rhs) { /// return with value
1006  Value *result = cached_visit(rhs);
1007  if (rhs->is_lvalue()) {
1008  result = _builder->CreateLoad(to_llvm_type(rhs->get_type()), result);
1009  }
1010  _builder->CreateRet(result);
1011  } else { /// return void
1012  _builder->CreateRetVoid();
1013  }
1014  _llvm_value_cache[p] = nullptr;
1015 }
1016 
1017 DEFINE_AST_VISITOR_IMPL(CodeGenerator, CompoundStmt) {
1018  for (auto *c : p->get_children()) {
1019  cached_visit(c);
1020  }
1021  _llvm_value_cache[p] = nullptr;
1022 }
1023 
1024 DEFINE_AST_VISITOR_IMPL(CodeGenerator, BinaryOrUnary) { _llvm_value_cache[p] = cached_visit(p->get_expr_ptr()); }
1025 
1026 DEFINE_AST_VISITOR_IMPL(CodeGenerator, BinaryOperator) {
1027  Value *ret = nullptr;
1028 
1029  switch (p->get_op()) {
1030  case BinaryOpKind::SUM:
1031  case BinaryOpKind::SUBTRACT:
1032  case BinaryOpKind::MULTIPLY:
1033  case BinaryOpKind::DIVIDE:
1034  case BinaryOpKind::MOD:
1035  ret = codegen_arithmetic(p);
1036  break;
1037  case BinaryOpKind::BAND:
1038  case BinaryOpKind::LAND:
1039  case BinaryOpKind::BOR:
1040  case BinaryOpKind::LOR:
1041  case BinaryOpKind::XOR:
1042  ret = codegen_relop(p);
1043  break;
1044  case BinaryOpKind::GT:
1045  case BinaryOpKind::GE:
1046  case BinaryOpKind::LT:
1047  case BinaryOpKind::LE:
1048  case BinaryOpKind::EQ:
1049  case BinaryOpKind::NE:
1050  ret = codegen_comparison(p);
1051  break;
1052  case BinaryOpKind::MEMBER_ACCESS:
1053  ret = codegen_member_access(p);
1054  break;
1055  default:
1056  TAN_ASSERT(false);
1057  break;
1058  }
1059 
1060  _llvm_value_cache[p] = ret;
1061 }
1062 
1063 DEFINE_AST_VISITOR_IMPL(CodeGenerator, UnaryOperator) {
1064  Value *ret = nullptr;
1065 
1066  auto rhs = p->get_rhs();
1067  switch (p->get_op()) {
1068  case UnaryOpKind::LNOT:
1069  ret = codegen_lnot(p);
1070  break;
1071  case UnaryOpKind::BNOT:
1072  ret = codegen_bnot(p);
1073  break;
1074  case UnaryOpKind::ADDRESS_OF:
1075  ret = codegen_address_of(p);
1076  break;
1077  case UnaryOpKind::PTR_DEREF:
1078  ret = codegen_ptr_deref(p);
1079  break;
1080  case UnaryOpKind::PLUS:
1081  ret = cached_visit(rhs);
1082  break;
1083  case UnaryOpKind::MINUS: {
1084  auto *r = cached_visit(rhs);
1085  if (rhs->is_lvalue()) {
1086  r = _builder->CreateLoad(to_llvm_type(rhs->get_type()), r);
1087  }
1088  if (r->getType()->isFloatingPointTy()) {
1089  ret = _builder->CreateFNeg(r);
1090  } else {
1091  ret = _builder->CreateNeg(r);
1092  }
1093  break;
1094  }
1095  default:
1096  TAN_ASSERT(false);
1097  break;
1098  }
1099 
1100  _llvm_value_cache[p] = ret;
1101 }
1102 
1103 DEFINE_AST_VISITOR_IMPL(CodeGenerator, Cast) {
1104  auto lhs = p->get_lhs();
1105  auto *dest_type = to_llvm_type(p->get_type());
1106 
1107  Value *val = cached_visit(lhs);
1108  TAN_ASSERT(val);
1109 
1110  Value *ret = nullptr;
1111  val = convert_llvm_type_to(lhs, p->get_type()); // lvalue will be loaded here
1112  if (lhs->is_lvalue()) {
1113  ret = create_block_alloca(_builder->GetInsertBlock(), dest_type, 1, "casted");
1114  _builder->CreateStore(val, ret);
1115  } else {
1116  ret = val;
1117  }
1118 
1119  _llvm_value_cache[p] = ret;
1120 }
1121 
1122 DEFINE_AST_VISITOR_IMPL(CodeGenerator, Assignment) {
1123  /// codegen the lhs and rhs
1124  auto *lhs = pcast<Expr>(p->get_lhs());
1125  auto *rhs = p->get_rhs();
1126 
1127  // type of lhs is the same as type of the assignment
1128  if (!lhs->is_lvalue()) {
1129  error(ErrorType::SEMANTIC_ERROR, lhs, "Value can only be assigned to an lvalue");
1130  }
1131 
1132  Value *from = cached_visit(rhs);
1133  from = load_if_is_lvalue(rhs);
1134  Value *to = cached_visit(lhs);
1135  TAN_ASSERT(from && to);
1136 
1137  _builder->CreateStore(from, to);
1138 
1139  _llvm_value_cache[p] = to;
1140 }
1141 
1142 DEFINE_AST_VISITOR_IMPL(CodeGenerator, FunctionCall) {
1143  FunctionDecl *callee = p->_callee;
1144  auto *callee_type = pcast<tanlang::FunctionType>(callee->get_type());
1145  size_t n = callee->get_n_args();
1146 
1147  // args
1148  vector<Value *> arg_vals;
1149  for (size_t i = 0; i < n; ++i) {
1150  auto actual_arg = p->_args[i];
1151  auto *a = cached_visit(actual_arg);
1152  if (!a) {
1153  error(ErrorType::SEMANTIC_ERROR, actual_arg, "Invalid function call argument");
1154  }
1155 
1156  // implicit cast
1157  auto expected_ty = callee_type->get_arg_types()[i];
1158  a = convert_llvm_type_to(actual_arg, expected_ty);
1159  arg_vals.push_back(a);
1160  }
1161 
1162  auto *func_type = (llvm::FunctionType *)to_llvm_type(callee->get_type());
1163  auto *F = cached_visit(callee);
1164 
1165  _llvm_value_cache[p] = _builder->CreateCall(func_type, F, arg_vals);
1166 }
1167 
1168 DEFINE_AST_VISITOR_IMPL(CodeGenerator, FunctionDecl) {
1169  auto *func_type = pcast<tanlang::FunctionType>(p->get_type());
1170 
1171  auto ret_ty = func_type->get_return_type();
1172  Metadata *ret_meta = to_llvm_metadata(ret_ty, get_or_create_di_file(p), AST_LINENO(p));
1173 
1174  /// get function name
1175  str func_name = p->get_name();
1176  /// rename to "tan_main", as it will be called by the real main function in runtime/main.cpp
1177  if (func_name == "main") {
1178  p->set_name(func_name = "tan_main");
1179  p->set_public(true);
1180  }
1181 
1182  /// generate prototype
1183  auto *F = (Function *)codegen_func_prototype(p);
1184 
1185  /// set function arg types
1186  vector<Metadata *> arg_metas;
1187  for (size_t i = 0; i < p->get_n_args(); ++i) {
1188  auto ty = func_type->get_arg_types()[i];
1189  arg_metas.push_back(to_llvm_metadata(ty, get_or_create_di_file(p), AST_LINENO(p)));
1190  }
1191 
1192  /// function implementation
1193  if (!p->is_external()) {
1194  /// create a new basic block to start insertion into
1195  BasicBlock *main_block = BasicBlock::Create(*_llvm_ctx, "func_entry", F);
1196  _builder->SetInsertPoint(main_block);
1197 
1198  /// debug information
1199  DIScope *di_scope = get_current_di_scope();
1200  auto *di_func_t = create_function_debug_info_type(ret_meta, arg_metas);
1201  DISubprogram *subprogram =
1202  _di_builder->createFunction(di_scope, func_name, func_name, get_or_create_di_file(p), AST_LINENO(p), di_func_t,
1203  p->src()->get_col(p->start()), DINode::FlagPrototyped,
1204  DISubprogram::SPFlagDefinition, nullptr, nullptr, nullptr);
1205  F->setSubprogram(subprogram);
1206  push_di_scope(subprogram);
1207 
1208  /// add_ctx all function arguments to scope
1209  size_t i = 0;
1210  for (auto &a : F->args()) {
1211  auto arg_name = p->get_arg_name(i);
1212  auto *arg_val = cached_visit(p->get_arg_decls()[i]);
1213  _builder->CreateStore(&a, arg_val);
1214 
1215  /// create a debug descriptor for the arguments
1216  auto *arg_meta = to_llvm_metadata(func_type->get_arg_types()[i], get_or_create_di_file(p), AST_LINENO(p));
1217  llvm::DILocalVariable *di_arg = _di_builder->createParameterVariable(
1218  subprogram, arg_name, (unsigned)i + 1, get_or_create_di_file(p), AST_LINENO(p), (DIType *)arg_meta, true);
1219  _di_builder->insertDeclare(arg_val, di_arg, _di_builder->createExpression(),
1220  debug_loc_of_node(p->get_arg_decls()[i], subprogram), _builder->GetInsertBlock());
1221  ++i;
1222  }
1223 
1224  /// generate function body
1225  cached_visit(p->get_body());
1226 
1227  /// create a return instruction if there is none, the return value is the default value of the return type
1228  auto *trailing_block = _builder->GetInsertBlock();
1229  if (trailing_block->sizeWithoutDebug() == 0 || !trailing_block->back().isTerminator()) {
1230  if (ret_ty->is_void()) {
1231  _builder->CreateRetVoid();
1232  } else {
1233  auto *ret_val = codegen_type_default_value(ret_ty);
1234  TAN_ASSERT(ret_val);
1235  _builder->CreateRet(ret_val);
1236  }
1237  }
1238  pop_di_scope();
1239  }
1240 
1241  _llvm_value_cache[p] = F;
1242 }
1243 
1244 DEFINE_AST_VISITOR_IMPL(CodeGenerator, Import) {
1245  for (FunctionDecl *f : p->_imported_funcs) {
1246  /// do nothing for already defined intrinsics
1247  auto *func = _module->getFunction(f->get_name());
1248  if (func) {
1249  _llvm_value_cache[f] = func;
1250  }
1251  _llvm_value_cache[f] = codegen_func_prototype(f);
1252  }
1253 
1254  _llvm_value_cache[p] = nullptr;
1255 }
1256 
1257 DEFINE_AST_VISITOR_IMPL(CodeGenerator, Intrinsic) {
1258  Value *ret = nullptr;
1259  switch (p->get_intrinsic_type()) {
1260  /// trivial codegen
1261  case IntrinsicType::GET_DECL:
1262  case IntrinsicType::LINENO:
1263  case IntrinsicType::ABORT:
1264  case IntrinsicType::STACK_TRACE:
1265  case IntrinsicType::FILENAME: {
1266  ret = cached_visit(p->get_sub());
1267  break;
1268  }
1269  case IntrinsicType::NOOP:
1270  default:
1271  break;
1272  }
1273 
1274  _llvm_value_cache[p] = ret;
1275 }
1276 
1277 DEFINE_AST_VISITOR_IMPL(CodeGenerator, ArrayLiteral) { _llvm_value_cache[p] = codegen_literals(p); }
1278 DEFINE_AST_VISITOR_IMPL(CodeGenerator, CharLiteral) { _llvm_value_cache[p] = codegen_literals(p); }
1279 DEFINE_AST_VISITOR_IMPL(CodeGenerator, BoolLiteral) { _llvm_value_cache[p] = codegen_literals(p); }
1280 DEFINE_AST_VISITOR_IMPL(CodeGenerator, IntegerLiteral) { _llvm_value_cache[p] = codegen_literals(p); }
1281 DEFINE_AST_VISITOR_IMPL(CodeGenerator, FloatLiteral) { _llvm_value_cache[p] = codegen_literals(p); }
1282 DEFINE_AST_VISITOR_IMPL(CodeGenerator, StringLiteral) { _llvm_value_cache[p] = codegen_literals(p); }
1283 DEFINE_AST_VISITOR_IMPL(CodeGenerator, NullPointerLiteral) { _llvm_value_cache[p] = codegen_literals(p); }
1284 
1285 DEFINE_AST_VISITOR_IMPL(CodeGenerator, StructDecl) {
1286  // don't do anything
1287 }
1288 
1289 DEFINE_AST_VISITOR_IMPL(CodeGenerator, Loop) {
1290  /*
1291  * Results should like this:
1292  *
1293  * ...
1294  * goto init
1295  *
1296  * init:
1297  * perform initialization // optional
1298  * goto predicate
1299  *
1300  * predicate:
1301  * check condition
1302  * goto 'loop_body' or 'loop_end'
1303  *
1304  * loop_body:
1305  * ...
1306  * goto iter
1307  *
1308  * iter:
1309  * run iteration statement // optional
1310  * goto 'predicate'
1311  *
1312  * loop_end:
1313  * ...
1314  *
1315  **/
1316 
1317  Function *func = _builder->GetInsertBlock()->getParent();
1318 
1319  // Create needed blocks in advance
1320  auto *init_block = BasicBlock::Create(*_llvm_ctx, "init", func);
1321  auto *predicate_block = BasicBlock::Create(*_llvm_ctx, "predicate", func);
1322  BasicBlock *body_block = BasicBlock::Create(*_llvm_ctx, "loop_body", func);
1323  auto *iter_block = BasicBlock::Create(*_llvm_ctx, "iter", func);
1324  auto *end_block = BasicBlock::Create(*_llvm_ctx, "loop_end", func);
1325 
1326  // Make sure to set _loop_start and _loop_end before generating loop_body,
1327  // cuz `break` and `continue` need them to work
1328  p->_loop_start = predicate_block;
1329  p->_loop_end = end_block;
1330 
1331  // Create a br instruction if there is no terminator instruction at the end of current block
1332  // cuz LLVM expects a terminator at the end of every block
1333  if (!_builder->GetInsertBlock()->back().isTerminator()) {
1334  _builder->CreateBr(init_block);
1335  }
1336 
1337  // 1. Initialization
1338  _builder->SetInsertPoint(init_block);
1339  if (p->_loop_type == ASTLoopType::FOR) {
1340  cached_visit(p->_initialization);
1341  }
1342  _builder->CreateBr(predicate_block);
1343 
1344  // 2. Predicate
1345  _builder->SetInsertPoint(predicate_block);
1346  auto *cond = cached_visit(p->_predicate);
1347  _builder->CreateCondBr(cond, body_block, end_block);
1348 
1349  // 3. Body
1350  _builder->SetInsertPoint(body_block);
1351  cached_visit(p->_body);
1352  // go to iteration block, unless there's a return, continue, or break statement
1353  if (!_builder->GetInsertBlock()->back().isTerminator()) {
1354  _builder->CreateBr(iter_block);
1355  }
1356 
1357  // 4. Iteration statement
1358  _builder->SetInsertPoint(iter_block);
1359  if (p->_loop_type == ASTLoopType::FOR) {
1360  cached_visit(p->_iteration);
1361  }
1362  // go back to loop start
1363  _builder->CreateBr(predicate_block);
1364 
1365  // 5. Prepare subsequence code generation
1366  _builder->SetInsertPoint(end_block);
1367 
1368  _llvm_value_cache[p] = nullptr;
1369 }
1370 
1371 DEFINE_AST_VISITOR_IMPL(CodeGenerator, BreakContinue) {
1372  auto loop = p->get_parent_loop();
1373  TAN_ASSERT(loop);
1374 
1375  auto s = loop->_loop_start;
1376  auto e = loop->_loop_end;
1377  TAN_ASSERT(s);
1378  TAN_ASSERT(e);
1379 
1380  if (p->get_node_type() == ASTNodeType::BREAK) {
1381  _builder->CreateBr(e);
1382  } else if (p->get_node_type() == ASTNodeType::CONTINUE) {
1383  _builder->CreateBr(s);
1384  } else {
1385  TAN_ASSERT(false);
1386  }
1387 
1388  _llvm_value_cache[p] = nullptr;
1389 }
1390 
1391 DEFINE_AST_VISITOR_IMPL(CodeGenerator, VarRef) { _llvm_value_cache[p] = cached_visit(p->get_referred()); }
1392 
1393 DEFINE_AST_VISITOR_IMPL(CodeGenerator, PackageDecl) {}
1394 
1395 void CodeGenerator::error(ErrorType type, ASTBase *p, const str &message) {
1396  Error(type, p->src()->get_token(p->start()), p->src()->get_token(p->end()), message).raise();
1397 }
1398 
1399 } // namespace tanlang