tan  0.0.1
driver.cpp
1 #include "driver/driver.h"
2 #include "lexer/lexer.h"
3 #include "source_file/token.h"
4 #include "analysis/type_check.h"
5 #include "analysis/register_declarations.h"
6 #include "analysis/organize_packages.h"
7 #include "analysis/type_precheck.h"
8 #include "analysis/scan_imports.h"
9 #include "codegen/code_generator.h"
10 #include "include/ast/package.h"
11 #include "ast/intrinsic.h"
12 #include "ast/stmt.h"
13 #include "ast/package.h"
14 #include "source_file/source_file.h"
15 #include "parser/parser.h"
16 #include "llvm_api/clang_frontend.h"
17 #include "linker/linker.h"
18 #include "llvm_api/llvm_ar.h"
19 #include <filesystem>
20 
21 #include "llvm/ADT/StringRef.h"
22 #include <llvm/Support/TargetSelect.h>
23 #include <llvm/MC/TargetRegistry.h>
24 #include <llvm/Target/TargetOptions.h>
25 #include <llvm/Support/FileSystem.h>
26 #include <llvm/IR/LegacyPassManager.h>
27 #include <llvm/Transforms/InstCombine/InstCombine.h>
28 #include <llvm/Transforms/IPO/PassManagerBuilder.h>
29 #include <llvm/Transforms/Scalar.h>
30 #include <llvm/Transforms/Scalar/GVN.h>
31 #include <llvm/Support/CodeGen.h>
32 #include <clang/Basic/TargetOptions.h>
33 #include <clang/Frontend/CompilerInvocation.h>
34 #include <llvm/LinkAllPasses.h>
35 #include <llvm/Option/OptTable.h>
36 #include <llvm/Support/ManagedStatic.h>
37 #include <llvm/Analysis/TargetTransformInfo.h>
38 #include <clang/Basic/Diagnostic.h>
39 #include <clang/Basic/DiagnosticOptions.h>
40 #include <llvm/MC/MCAsmBackend.h>
41 #include <llvm/MC/MCAsmInfo.h>
42 #include <llvm/MC/MCCodeEmitter.h>
43 #include <llvm/MC/MCContext.h>
44 #include <llvm/MC/MCInstrInfo.h>
45 #include <llvm/MC/MCObjectWriter.h>
46 #include <llvm/MC/MCParser/MCAsmParser.h>
47 #include <llvm/MC/MCParser/MCTargetAsmParser.h>
48 #include <llvm/MC/MCRegisterInfo.h>
49 #include <llvm/MC/MCSectionMachO.h>
50 #include <llvm/MC/MCStreamer.h>
51 #include <llvm/MC/MCSubtargetInfo.h>
52 #include <llvm/Support/Host.h>
53 #include <llvm/Support/MemoryBuffer.h>
54 #include <llvm/Support/SourceMgr.h>
55 #include <llvm/Support/Regex.h>
56 #include <llvm/Support/StringSaver.h>
57 #include <llvm/Object/Archive.h>
58 #include <llvm/Object/IRObjectFile.h>
59 #include <llvm/Support/ConvertUTF.h>
60 #include <llvm/Support/FormatVariadic.h>
61 #include <llvm/Support/LineIterator.h>
62 #include <lld/Common/Driver.h>
63 #include <llvm/Analysis/LoopAnalysisManager.h>
64 #include <llvm/Analysis/CGSCCPassManager.h>
65 #include <llvm/Passes/PassBuilder.h>
66 
67 using namespace tanlang;
68 namespace fs = std::filesystem;
69 
70 /// \see https://gcc.gnu.org/onlinedocs/gcc-4.4.1/gcc/Overall-Options.html
71 static constexpr std::array CXX_EXTS{".cpp",
72  ".CPP",
73  ".cxx",
74  ".c",
75  ".cc",
76  ".C",
77  ".c++",
78  ".cp",
79  ".i",
80  ".ii",
81  ".h",
82  ".hh",
83  ".H",
84  ".hp",
85  ".hxx",
86  ".hpp",
87  ".HPP",
88  ".h++",
89  ".tcc"};
90 static constexpr str_view TAN_EXT = ".tan";
91 
92 static umap<TanOptLevel, llvm::CodeGenOpt::Level> tan_to_llvm_opt_level{
93  {O0, llvm::CodeGenOpt::None },
94  {O1, llvm::CodeGenOpt::Less },
95  {O2, llvm::CodeGenOpt::Default },
96  {O3, llvm::CodeGenOpt::Aggressive},
97 };
98 
99 void verify_dirs(const vector<str> &dirs);
100 
101 /**
102  * \brief Compile CXX files using clang frontend and return a list of object files
103  */
104 vector<str> compile_cxx(const vector<str> &files, TanCompilation config);
105 
106 static str search_library(const vector<str> &lib_dirs, const str &lib_name);
107 
108 CompilerDriver::~CompilerDriver() { singleton = nullptr; }
109 
110 CompilerDriver::CompilerDriver(TanCompilation config) {
111  // Verify config
112  verify_dirs(config.lib_dirs);
113  verify_dirs(config.import_dirs);
114  _config = config;
115 
116  // Register import dirs
117  size_t n_import = _config.import_dirs.size();
118  CompilerDriver::import_dirs.reserve(n_import);
120  CompilerDriver::import_dirs.begin(), _config.import_dirs.begin(), _config.import_dirs.end());
121 
122  // Initialize LLVM
123  llvm::InitializeAllTargetInfos();
124  llvm::InitializeAllTargets();
125  llvm::InitializeAllTargetMCs();
126  llvm::InitializeAllAsmParsers();
127  llvm::InitializeAllAsmPrinters();
128  auto target_triple = llvm::sys::getDefaultTargetTriple();
129  str error;
130  auto target = llvm::TargetRegistry::lookupTarget(target_triple, error);
131  if (!target) {
132  Error err(error);
133  err.raise();
134  }
135 
136  auto CPU = "generic";
137  auto features = "";
138  llvm::TargetOptions opt;
139  /// relocation model
140  auto RM = llvm::Reloc::Model::PIC_;
141  _target_machine = target->createTargetMachine(target_triple, CPU, features, opt, RM);
142 
143  singleton = this;
144 }
145 
146 void CompilerDriver::run(const vector<str> &files) {
147  // Check if files exist
148  // and separate cxx and tan source files based on their file extensions
149  vector<str> tan_files{};
150  vector<str> cxx_files{};
151  for (size_t i = 0; i < files.size(); ++i) {
152  fs::path f = fs::path(files[i]);
153  str ext = f.extension().string();
154 
155  if (!fs::exists(f))
156  Error(ErrorType::FILE_NOT_FOUND, fmt::format("File not found: {}", files[i])).raise();
157 
158  bool is_cxx = std::any_of(CXX_EXTS.begin(), CXX_EXTS.end(), [=](const str &e) { return e == ext; });
159  if (is_cxx) {
160  cxx_files.push_back(files[i]);
161  } else if (ext == TAN_EXT) {
162  tan_files.push_back(files[i]);
163  } else {
164  Error(ErrorType::GENERIC_ERROR, fmt::format("Unrecognized source file: {}", files[i])).raise();
165  }
166  }
167 
168  // Compiling
169  auto cxx_objs = compile_cxx(cxx_files, _config);
170  auto tan_objs = compile_tan(tan_files);
171 
172  // Linking
173  vector<str> obj_files(cxx_objs.size() + tan_objs.size());
174  size_t i = 0;
175  for (const str &o : cxx_objs)
176  obj_files[i++] = o;
177  for (const str &o : tan_objs)
178  obj_files[i++] = o;
179 
180  link(obj_files);
181 }
182 
184  auto q = _packages.find(name);
185  if (q != _packages.end()) {
186  return q->second;
187  }
188  return nullptr;
189 }
190 
191 void CompilerDriver::register_package(const str &name, Package *package) { _packages[name] = package; }
192 
193 vector<Package *> CompilerDriver::stage1_analysis(vector<Program *> programs) {
194  TAN_ASSERT(!programs.empty());
195 
196  // Register all declarations in their local contexts
197  for (auto *p : programs) {
199  rd.run(p);
200  }
201 
202  // Organize input files into packages
203  OrganizePackages op;
204  vector<Package *> ps = op.run(programs);
205 
206  // Skip packages that are already processed, and check for cyclic dependencies
207  vector<Package *> packages{};
208  for (Package *p : ps) {
209  AnalyzeStatus status = _package_status[p->get_name()];
210  if (status == AnalyzeStatus::None) {
211  packages.push_back(p);
212  } else if (status == AnalyzeStatus::Processing) {
213  // TODO: better error message
214  Error(ErrorType::IMPORT_ERROR, "Cyclic package dependency detected for package: " + p->get_name()).raise();
215  }
216  }
217 
218  // Register packages we found BEFORE running semantic analysis,
219  // so that we can search for them during analysis
220  for (auto *p : packages) {
221  register_package(p->get_name(), p);
222  }
223 
224  // Scan package imports and find the source files needed
225  for (auto *p : packages) {
226  _package_status[p->get_name()] = AnalyzeStatus::Processing;
227 
228  uset<str> import_files{};
229  uset<str> import_names{};
230 
231  ScanImports si;
232  auto res = si.run(p);
233  for (const auto &e : res) {
234  import_names.insert(e.first);
235  import_files.insert(e.second.begin(), e.second.end());
236  }
237 
238  // Analyze imported files and store results
239  if (!import_files.empty()) {
240  vector<Package *> import_packages = stage1_analysis(parse(vector<str>(import_files.begin(), import_files.end())));
241  for (Package *ip : import_packages) {
242  if (import_names.contains(ip->get_name())) { // import_packages might have some unrelated packages
243  register_package(ip->get_name(), ip);
244  }
245  }
246  }
247 
248  _package_status[p->get_name()] = AnalyzeStatus::Done;
249  }
250 
251  // Partial type checking
252  for (auto *p : packages) {
253  TypePrecheck tp;
254  tp.run(p);
255  }
256 
257  return packages;
258 }
259 
260 vector<str> CompilerDriver::compile_tan(const vector<str> &files) {
261  bool print_ir_code = _config.verbose >= 1;
262  size_t n_files = files.size();
263  vector<str> ret(n_files);
264 
265  // Parse
266  auto programs = parse(files);
267 
268  // (Optional): Print AST tree
269  if (_config.verbose >= 2) {
270  for (auto *p : programs) {
271  std::cout << fmt::format("AST Tree of {}:\n{}", p->src()->get_filename(), p->repr());
272  }
273  }
274 
275  vector<Package *> packages = stage1_analysis(programs);
276 
277  // Full semantic analysis
278  // We still need to perform a full analysis on imported packages, because we might implicitly depend on some private
279  // symbols in the imported packages
280  for (auto [name, p] : _packages) {
281  TypeCheck analyzer;
282  analyzer.run(p);
283  }
284 
285  // Code generation
286  size_t i = 0;
287  for (auto *p : packages) {
288  std::cout << fmt::format("Compiling TAN package: {}\n", p->get_name());
289 
290  // IR
291  _target_machine->setOptLevel(tan_to_llvm_opt_level[_config.opt_level]);
292  auto *cg = new CodeGenerator(_target_machine);
293  cg->run(p);
294 
295  if (print_ir_code)
296  cg->dump_ir();
297 
298  // object file
299  str ofile = ret[i] = fs::path(p->get_name() + ".o").filename().string();
300  cg->emit_to_file(ofile);
301 
302  ++i;
303 
304  delete cg;
305  }
306 
307  for (auto *p : programs) {
308  delete p;
309  }
310  return ret;
311 }
312 
313 vector<Program *> CompilerDriver::parse(const vector<str> &files) {
314  TAN_ASSERT(!files.empty());
315 
316  vector<Program *> ret{};
317 
318  for (const str &file : files) {
319  SourceFile *source = new SourceFile();
320  source->open(file);
321 
322  // tokenization
323  auto tokens = tokenize(source);
324 
325  auto *sm = new TokenizedSourceFile(file, tokens);
326  auto *parser = new Parser(sm);
327  auto *ast = parser->parse();
328 
329  // register top-level declarations
330  // TODO: put intrinsics into a dedicated module
331  auto intrinsic_funcs = Intrinsic::GetIntrinsicFunctionDeclarations();
332  for (auto *f : intrinsic_funcs) {
333  ast->ctx()->set_function_decl(f);
334  }
335 
336  ret.push_back(ast);
337  }
338 
339  return ret;
340 }
341 
342 vector<str> CompilerDriver::resolve_package_import(const str &callee_path, const str &import_name) {
343  auto import_path = fs::path(import_name);
344 
345  // importing using an absolute path
346  if (import_path.is_absolute() && fs::exists(import_path)) {
347  return {import_path.string()}; // no reason to continue
348  }
349 
350  vector<str> ret{};
351 
352  // search relative to callee's path
353  {
354  auto p = fs::path(callee_path).parent_path() / import_path;
355  p = p.lexically_normal();
356  if (fs::exists(p) || fs::exists(p.replace_extension(".tan"))) {
357  ret.push_back(fs::absolute(p).string());
358  }
359  }
360 
361  // user-defined include dirs
362  for (const auto &rel : CompilerDriver::import_dirs) {
363  auto p = fs::path(rel) / import_path;
364  p = p.lexically_normal();
365  if (fs::exists(p) || fs::exists(p.replace_extension(".tan"))) {
366  ret.push_back(fs::absolute(p).string());
367  }
368  }
369 
370  // TODO: system directories
371  return ret;
372 }
373 
374 void CompilerDriver::link(const std::vector<str> &files) {
375  if (_config.type == SLIB) { // static
376  // also add files specified by -l option
377  vector<str> all_files(files.begin(), files.end());
378  for (const auto &lib : _config.link_files) {
379  str path = search_library(_config.lib_dirs, lib);
380 
381  if (path.empty())
382  Error(ErrorType::LINK_ERROR, fmt::format("Unable to find library: {}", lib)).raise();
383 
384  all_files.push_back(path);
385  }
386 
387  llvm_ar_create_static_lib(_config.out_file, all_files);
388  return;
389  }
390 
391  // shared, obj, or exe
392  using tanlang::Linker;
393  Linker linker;
394  linker.add_files(files);
395  linker.add_flag("-o" + str(_config.out_file));
396  if (_config.type == EXE) {
397  linker.add_flags({"-fPIE"});
398  } else if (_config.type == DLIB) {
399  linker.add_flags({"-shared"});
400  }
401 
402  // -L
403  size_t n_lib_dirs = _config.lib_dirs.size();
404  for (size_t i = 0; i < n_lib_dirs; ++i) {
405  auto p = fs::absolute(fs::path(_config.lib_dirs[i]));
406  linker.add_flag("-L" + p.string());
407  linker.add_flag("-Wl,-rpath," + p.string());
408  }
409 
410  // -l
411  size_t n_link_files = _config.link_files.size();
412  for (size_t i = 0; i < n_link_files; ++i) {
413  linker.add_flag("-l" + std::string(_config.link_files[i]));
414  }
415  linker.add_flag(opt_level_to_string(_config.opt_level));
416 
417  if (!linker.link())
418  Error(ErrorType::LINK_ERROR, "Failed linking").raise();
419 }
420 
421 /**
422  * \section Helpers
423  */
424 
425 vector<str> compile_cxx(const vector<str> &files, TanCompilation config) {
426  vector<str> obj_files{};
427 
428  if (!files.empty()) {
429  std::cout << "Compiling " << files.size() << " CXX file(s): ";
430  std::for_each(files.begin(), files.end(), [=](auto f) { std::cout << f << " "; });
431  std::cout << "\n";
432 
433  auto err_code = clang_compile(files, &config);
434  if (err_code)
435  Error(ErrorType::GENERIC_ERROR, "Failed to compile CXX files").raise();
436 
437  // object file paths
438  size_t n = files.size();
439  obj_files.reserve(n);
440  for (size_t i = 0; i < n; ++i) {
441  auto p = fs::path(str(files[i])).replace_extension(".o").filename();
442  obj_files.push_back(p.string());
443  }
444  }
445 
446  return obj_files;
447 }
448 
449 void verify_dirs(const vector<str> &dirs) {
450  for (size_t i = 0; i < dirs.size(); ++i) {
451  fs::path p = fs::path(dirs[i]);
452 
453  if (!fs::exists(p))
454  Error(ErrorType::FILE_NOT_FOUND, fmt::format("File not found: {}", dirs[i])).raise();
455 
456  if (!fs::is_directory(p))
457  Error(ErrorType::FILE_NOT_FOUND, fmt::format("Not a directory: {}", dirs[i])).raise();
458  }
459 }
460 
461 str search_library(const std::vector<str> &lib_dirs, const str &lib_name) {
462  // TODO: platform specific extensions
463  for (const str &dir : lib_dirs) {
464  vector<fs::path> candidates = {
465  /// possible filenames
466  fs::path(dir) / fs::path(lib_name), //
467  fs::path(dir) / fs::path(lib_name + ".a"), //
468  fs::path(dir) / fs::path(lib_name + ".so"), //
469  fs::path(dir) / fs::path("lib" + lib_name + ".a"), //
470  fs::path(dir) / fs::path("lib" + lib_name + ".so"), //
471  };
472 
473  for (const auto &p : candidates) {
474  if (fs::exists(p)) {
475  return p.string();
476  }
477  }
478  }
479 
480  return "";
481 }
vector< Package * > stage1_analysis(vector< Program * > ps)
Get a set of partially analyzed packages that can be used for cross-package dependency analysis....
Definition: driver.cpp:193
Package * get_package(const str &name)
Get a pointer to a Package. Semantic analysis is not guaranteed to be fully performed on it.
Definition: driver.cpp:183
static vector< str > import_dirs
Import search directories FIXME: static variable?
Definition: driver.h:34
void run(const vector< str > &files)
Compile CXX or TAN source files and link their output object files.
Definition: driver.cpp:146
vector< Program * > parse(const vector< str > &files)
Parse the corresponding source file, and build AST.
Definition: driver.cpp:313
static vector< str > resolve_package_import(const str &callee_path, const str &import_name)
Get a list of possible files that corresponds to an import. Check PACKAGES.md.
Definition: driver.cpp:342
void register_package(const str &name, Package *package)
Register a Package that has been spotted from source files, with top-level context stored inside.
Definition: driver.cpp:191
static vector< FunctionDecl * > GetIntrinsicFunctionDeclarations()
Generate a list of intrinsics function prototypes/declarations, such as @abort
Definition: intrinsic.cpp:35
Organize a list of source files into their corresponding packages according to the code....
Top Down Operator Precedence Parsing.
Definition: parser.h:16
Register all declarations (including local) in the corresponding scopes. Run this stage early to easi...
Scans all dependencies in a package, and return their names and paths to relevant source files.
Definition: scan_imports.h:17
Different from SourceFile, TokenizedSourceFile manages the tokenized text of a source file.
Perform preliminary type checking. We try our best to resolve types, and remember those that cannot b...
Definition: type_precheck.h:23
Compilation configuration.
Definition: tan.h:42
vector< str > import_dirs
Library search paths.
Definition: tan.h:49
unsigned verbose
Optimization level,.
Definition: tan.h:45
str out_file
Verbose level, 0 non-verbose, 1 print LLVM IR, 2, print LLVM IR and abstract syntax tree.
Definition: tan.h:46
vector< str > link_files
Output filename, invalid if TanCompilation::type is set to OBJ.
Definition: tan.h:47
TanOptLevel opt_level
Type of compilation,.
Definition: tan.h:44
vector< str > lib_dirs
Files to link against.
Definition: tan.h:48