From b1cca144331672e5d610ef5f5c6f587ecbc39b8f Mon Sep 17 00:00:00 2001 From: Sameer Rahmani Date: Tue, 8 Aug 2023 22:32:29 +0100 Subject: [PATCH] Move over namespace, ast, and the reader --- CMakeLists.txt | 2 + serene/include/serene/config.h.in | 27 ++ serene/src/CMakeLists.txt | 4 + serene/src/ast.cpp | 167 ++++++++++++ serene/src/ast.h | 254 ++++++++++++++++++ serene/src/environment.h | 80 ++++++ serene/src/jit/jit.h | 11 + serene/src/location.h | 88 ++++++ serene/src/namespace.cpp | 223 ++++++++++++++++ serene/src/namespace.h | 140 ++++++++++ serene/src/options.h | 2 +- serene/src/reader.cpp | 431 ++++++++++++++++++++++++++++++ serene/src/reader.h | 112 ++++++++ serene/src/source_mgr.cpp | 225 ++++++++++++++++ serene/src/source_mgr.h | 190 +++++++++++++ serene/src/types.h | 86 ++++++ serene/src/utils.h | 138 ++++++++++ 17 files changed, 2179 insertions(+), 1 deletion(-) create mode 100644 serene/src/ast.cpp create mode 100644 serene/src/ast.h create mode 100644 serene/src/environment.h create mode 100644 serene/src/location.h create mode 100644 serene/src/namespace.cpp create mode 100644 serene/src/namespace.h create mode 100644 serene/src/reader.cpp create mode 100644 serene/src/reader.h create mode 100644 serene/src/source_mgr.cpp create mode 100644 serene/src/source_mgr.h create mode 100644 serene/src/types.h create mode 100644 serene/src/utils.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 267da7b..483c1c7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,8 @@ option(LLVM_USE_PERF "If the target LLVM build is built with LLVM_USE_PERF" OFF) # Only do these if this is the main project, and not if it is included through add_subdirectory if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) ## Settings ======================= + + set(C_STANDARD 17) # specify the C++ standard if (CPP_20_SUPPORT) set(CMAKE_CXX_STANDARD 20) diff --git a/serene/include/serene/config.h.in b/serene/include/serene/config.h.in index 151ca18..2feb0c4 100644 --- a/serene/include/serene/config.h.in +++ b/serene/include/serene/config.h.in @@ -24,4 +24,31 @@ // Should we build the support for MLIR CL OPTIONS? #cmakedefine SERENE_WITH_MLIR_CL_OPTION +#ifdef __cplusplus +enum class TypeID { +#else +typedef enum { +#endif + NIL = 0, + SYMBOL, + TYPE, + FN, + NUMBER, + INT, + CSTRING, + STRING, + KEYWORD, + NAMESPACE, + LIST, + MAP, + VECTOR, + STRUCT, + PROTOCOL, + Error, +} +#ifndef __cplusplus +TypeID +#endif +; + #endif diff --git a/serene/src/CMakeLists.txt b/serene/src/CMakeLists.txt index 8f0c3f1..c544c98 100644 --- a/serene/src/CMakeLists.txt +++ b/serene/src/CMakeLists.txt @@ -19,4 +19,8 @@ target_sources(serene PRIVATE commands/commands.cpp jit/jit.cpp + + ast.cpp + namespace.cpp + ) diff --git a/serene/src/ast.cpp b/serene/src/ast.cpp new file mode 100644 index 0000000..d445b26 --- /dev/null +++ b/serene/src/ast.cpp @@ -0,0 +1,167 @@ +/* -*- C++ -*- + * Serene Programming Language + * + * Copyright (c) 2019-2023 Sameer Rahmani + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "ast.h" + +#include + +namespace serene::ast { + +// ============================================================================ +// Symbol +// ============================================================================ +Symbol::Symbol(const LocationRange &loc, llvm::StringRef name, + llvm::StringRef currentNS) + : Expression(loc) { + // IMPORTANT NOTE: the `name` and `currentNS` should be valid string and + // already validated. + auto partDelimiter = name.find('/'); + if (partDelimiter == std::string::npos) { + nsName = currentNS.str(); + this->name = name.str(); + + } else { + this->name = name.substr(partDelimiter + 1, name.size()).str(); + nsName = name.substr(0, partDelimiter).str(); + } +}; + +Symbol::Symbol(Symbol &s) : Expression(s.location) { + this->name = s.name; + this->nsName = s.nsName; +}; + +TypeID Symbol::getType() const { return TypeID::SYMBOL; }; + +std::string Symbol::toString() const { + return llvm::formatv("", nsName, name); +} + +bool Symbol::classof(const Expression *e) { + return e->getType() == TypeID::SYMBOL; +}; + +// ============================================================================ +// Number +// ============================================================================ +Number::Number(const LocationRange &loc, const long &num) + : Expression(loc), value(num), isNeg(num < 0), isFloat(false){}; + +Number::Number(const LocationRange &loc, const double &num) + : Expression(loc), value(num), isNeg(num < 0), isFloat(true){}; + +Number::Number(Number &n) : Expression(n.location) { this->value = n.value; }; + +TypeID Number::getType() const { return TypeID::NUMBER; }; + +std::string Number::toString() const { + if (isFloat) { + return llvm::formatv("", std::get(value)); + } + return llvm::formatv("", std::get(value)); +} + +bool Number::classof(const Expression *e) { + return e->getType() == TypeID::NUMBER; +}; + +// ============================================================================ +// List +// ============================================================================ +List::List(const LocationRange &loc, Ast &v) : Expression(loc) { + this->elements.swap(v); + v.clear(); +}; + +TypeID List::getType() const { return TypeID::LIST; }; + +std::string List::toString() const { + std::string s{this->elements.empty() ? "-" : ""}; + + for (const auto &n : this->elements) { + s = llvm::formatv("{0}, {1}", s, n->toString()); + } + + return llvm::formatv("", s); +} + +bool List::classof(const Expression *e) { + return e->getType() == TypeID::LIST; +}; + +// ============================================================================ +// String +// ============================================================================ +String::String(const LocationRange &loc, llvm::StringRef v) + : Expression(loc), data(v.str()){}; + +String::String(String &s) : Expression(s.location), data(s.data){}; + +TypeID String::getType() const { return TypeID::STRING; }; + +std::string String::toString() const { + const short truncateSize = 10; + return llvm::formatv( + "", + data.substr(0, data.size() >= truncateSize ? truncateSize : data.size())); +} + +bool String::classof(const Expression *e) { + return e->getType() == TypeID::STRING; +}; +// ============================================================================ +// Keyword +// ============================================================================ +Keyword::Keyword(const LocationRange &loc, llvm::StringRef name) + : Expression(loc), name(name.str()){}; + +Keyword::Keyword(Keyword &s) : Expression(s.location) { this->name = s.name; }; + +TypeID Keyword::getType() const { return TypeID::KEYWORD; }; + +std::string Keyword::toString() const { + return llvm::formatv("", name); +} + +bool Keyword::classof(const Expression *e) { + return e->getType() == TypeID::KEYWORD; +}; + +// ============================================================================ +// Error +// ============================================================================ +Error::Error(const LocationRange &loc, std::unique_ptr tag, + llvm::StringRef msg) + : Expression(loc), msg(msg.str()), tag(std::move(tag)){}; + +Error::Error(Error &e) : Expression(e.location) { + this->msg = e.msg; + this->tag = std::move(e.tag); +}; + +TypeID Error::getType() const { return TypeID::KEYWORD; }; + +std::string Error::toString() const { + return llvm::formatv("", msg); +} + +bool Error::classof(const Expression *e) { + return e->getType() == TypeID::KEYWORD; +}; + +} // namespace serene::ast diff --git a/serene/src/ast.h b/serene/src/ast.h new file mode 100644 index 0000000..a39d62e --- /dev/null +++ b/serene/src/ast.h @@ -0,0 +1,254 @@ +/* -*- C++ -*- + * Serene Programming Language + * + * Copyright (c) 2019-2023 Sameer Rahmani + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef AST_H +#define AST_H + +#include "location.h" +#include "serene/config.h" + +#include + +#include + +namespace serene::ast { + +struct Expression; + +using Node = std::unique_ptr; +using MaybeNode = llvm::Expected; + +using Ast = std::vector; +using MaybeAst = llvm::Expected; + +constexpr static auto EmptyNode = nullptr; + +// ============================================================================ +// Expression +// The abstract class that all the AST nodes derived from. It provides the +// common interface for the expressions to implement. +// ============================================================================ +struct Expression { + + /// The location range provide information regarding to where in the input + /// string the current expression is used. + LocationRange location; + + Expression(const LocationRange &loc) : location(loc){}; + virtual ~Expression() = default; + + /// Returns the type of the expression. We need this funciton to perform + /// dynamic casting of expression object to implementations such as lisp or + /// symbol. + virtual TypeID getType() const = 0; + + /// The AST representa htion of an expression + virtual std::string toString() const = 0; + + /// Analyzes the semantics of current node and return a new node in case + /// that we need to semantically rewrite the current node and replace it with + /// another node. For example to change from a List containing `(def a b)` + /// to a `Def` node that represents defining a new binding. + /// + /// \param state is the analysis state object of the semantic analyzer. + // virtual MaybeNode analyze(semantics::AnalysisState &state) = 0; + + /// Genenates the correspondig SLIR of the expressoin and attach it to the + /// given module. + /// + /// \param ns The namespace that current expression is in it. + /// \param m The target MLIR moduleOp to attach the operations to + // virtual void generateIR(serene::Namespace &ns, mlir::ModuleOp &m) = 0; +}; + +// ============================================================================ +// Symbol +// It represent a lisp symbol (don't mix it up with ELF symbols). +// ============================================================================ +struct Symbol : public Expression { + std::string name; + std::string nsName; + + Symbol(const LocationRange &loc, llvm::StringRef name, + llvm::StringRef currentNS); + Symbol(Symbol &s); + + TypeID getType() const override; + std::string toString() const override; + + ~Symbol() = default; + + static bool classof(const Expression *e); +}; + +// ============================================================================ +// Number +// ============================================================================ +struct Number : public Expression { + // TODO: [ast] Split the number type into their own types + std::variant value; + // /TODO + + bool isNeg; + bool isFloat; + + Number(const LocationRange &loc, const long &num); + Number(const LocationRange &loc, const unsigned long &num); + Number(const LocationRange &loc, const double &num); + Number(Number &n); + + TypeID getType() const override; + std::string toString() const override; + + ~Number() = default; + + static bool classof(const Expression *e); +}; + +// ============================================================================ +// List +// ============================================================================ +struct List : public Expression { + Ast elements; + + List(const LocationRange &loc, Ast &v); + List(const List &l) = delete; + List(List &&l) noexcept = default; + + TypeID getType() const override; + std::string toString() const override; + + ~List() = default; + + static bool classof(const Expression *e); +}; + +// ============================================================================ +// String +// ============================================================================ +struct String : public Expression { + std::string data; + + String(const LocationRange &loc, llvm::StringRef v); + String(String &s); + + TypeID getType() const override; + std::string toString() const override; + + ~String() = default; + + static bool classof(const Expression *e); +}; + +// ============================================================================ +// Keyword +// ============================================================================ +struct Keyword : public Expression { + std::string name; + + Keyword(const LocationRange &loc, llvm::StringRef name); + Keyword(Keyword &s); + + TypeID getType() const override; + std::string toString() const override; + + ~Keyword() = default; + + static bool classof(const Expression *e); +}; + +// ============================================================================ +// Error +// One way of representing errors is to just treat them as another type of node +// in the AST and the parser can generate them in case of any error or semantic +// analizer can do the same. At the time of processing the AST by the JIT +// or even anytime earlier we can just stop the execution and deal with the +// issue +// ============================================================================ +struct Error : public Expression { + std::string msg; + std::unique_ptr tag; + + Error(const LocationRange &loc, std::unique_ptr tag, + llvm::StringRef msg); + Error(Error &e); + + TypeID getType() const override; + std::string toString() const override; + + ~Error() = default; + + static bool classof(const Expression *e); +}; + +/// Create a new `node` of type `T` and forwards any given parameter +/// to the constructor of type `T`. This is the **official way** to create +/// a new `Expression`. Here is an example: +/// \code +/// auto list = make(); +/// \endcode +/// +/// \param[args] Any argument with any type passed to this function will be +/// passed to the constructor of type T. +/// \return A unique pointer to an Expression +template +Node make(Args &&...args) { + return std::make_unique(std::forward(args)...); +}; +/// Create a new `node` of type `T` and forwards any given parameter +/// to the constructor of type `T`. This is the **official way** to create +/// a new `Expression`. Here is an example: +/// \code +/// auto list = makeAndCast(); +/// \endcode +/// +/// \param[args] Any argument with any type passed to this function will be +/// passed to the constructor of type T. +/// \return A unique pointer to a value of type T. +template +std::shared_ptr makeAndCast(Args &&...args) { + return std::make_unique(std::forward(args)...); +}; + +/// The helper function to create a new `Node` and returnsit. It should be useds +/// where every we want to return a `MaybeNode` successfully. +template +MaybeNode makeSuccessfulNode(Args &&...args) { + return make(std::forward(args)...); +}; + +/// The hlper function to creates an Error (`llvm::Error`) by passing all +/// the given arguments to the constructor of the template param `E`. +template +llvm::Expected makeErrorful(Args &&...args) { + return llvm::make_error(std::forward(args)...); +}; + +/// The hlper function to creates an Error (`llvm::Error`) by passing all +/// the given arguments to the constructor of the template param `E`. +template +MaybeNode makeErrorNode(Args &&...args) { + return makeErrorful(std::forward(args)...); +}; + +/// Converts the given AST to string and prints it out +void dump(Ast &); + +} // namespace serene::ast + +#endif diff --git a/serene/src/environment.h b/serene/src/environment.h new file mode 100644 index 0000000..0e3225b --- /dev/null +++ b/serene/src/environment.h @@ -0,0 +1,80 @@ +/* -*- C++ -*- + * Serene Programming Language + * + * Copyright (c) 2019-2023 Sameer Rahmani + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef ENVIRONMENT_H +#define ENVIRONMENT_H + +#include "utils.h" + +#include +#include + +namespace serene { + +/// This class represents a classic lisp environment (or scope) that holds the +/// bindings from type `K` to type `V`. For example an environment of symbols +/// to expressions would be `Environment` +template +class Environment { + + Environment *parent; + + using StorageType = llvm::StringMap; + // The actual bindings storage + StorageType pairs; + +public: + Environment() : parent(nullptr) {} + explicit Environment(Environment *parent) : parent(parent){}; + + /// Look up the given `key` in the environment and return it. + std::optional lookup(llvm::StringRef key) { + if (auto value = pairs.lookup(key)) { + return value; + } + + if (parent) { + return parent->lookup(key); + } + + return std::nullopt; + }; + + /// Insert the given `key` with the given `value` into the storage. This + /// operation will shadow an aleady exist `key` in the parent environment + mlir::LogicalResult insert_symbol(llvm::StringRef key, V value) { + auto result = pairs.insert_or_assign(key, value); + UNUSED(result); + return mlir::success(); + }; + + inline typename StorageType::iterator begin() { return pairs.begin(); } + + inline typename StorageType::iterator end() { return pairs.end(); } + + inline typename StorageType::const_iterator begin() const { + return pairs.begin(); + } + inline typename StorageType::const_iterator end() const { + return pairs.end(); + } +}; + +} // namespace serene + +#endif diff --git a/serene/src/jit/jit.h b/serene/src/jit/jit.h index c0358ab..2f998df 100644 --- a/serene/src/jit/jit.h +++ b/serene/src/jit/jit.h @@ -119,6 +119,15 @@ class JIT { llvm::Error createCurrentProcessJD(); + // Anonymous function counter. We need to assing a unique name to each + // anonymous function and we use this counter to generate those names + std::atomic fn_counter = 0; + + // Since indexing namespaces by the name would be inefficient, We use + // unsigned integer and assign a number to all the namespaces at the + // creation time. Namespace IDs have to be unique. + std::atomic ns_counter = 0; + public: JIT(llvm::orc::JITTargetMachineBuilder &&jtmb, std::unique_ptr opts); static MaybeJIT make(llvm::orc::JITTargetMachineBuilder &&jtmb, @@ -151,6 +160,8 @@ public: void setLoadPaths(std::vector &dirs) { loadPaths.swap(dirs); }; /// Return the load paths for namespaces llvm::ArrayRef getLoadPaths() { return loadPaths; }; + + const Options &getOptions() const { return *options; }; }; MaybeJIT makeJIT(std::unique_ptr opts); diff --git a/serene/src/location.h b/serene/src/location.h new file mode 100644 index 0000000..2f596fb --- /dev/null +++ b/serene/src/location.h @@ -0,0 +1,88 @@ +/* -*- C++ -*- + * Serene Programming Language + * + * Copyright (c) 2019-2023 Sameer Rahmani + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef LOCATION_H +#define LOCATION_H + +#include +#include + +#include + +namespace serene { + +/// It represents a location in the input string to the parser via `line`, +struct Location { + /// Since namespaces are our unit of compilation, we need to have + /// a namespace in hand + llvm::StringRef ns; + + std::optional filename = std::nullopt; + /// A pointer to the character that this location is pointing to + /// it the input buffer + const char *c = nullptr; + + /// At this stage we only support 65535 lines of code in each file + unsigned short int line = 0; + /// At this stage we only support 65535 chars in each line + unsigned short int col = 0; + + bool knownLocation = true; + + ::std::string toString() const; + + Location() = default; + explicit Location(llvm::StringRef ns, + std::optional fname = std::nullopt, + const char *c = nullptr, unsigned short int line = 0, + unsigned short int col = 0, bool knownLocation = true) + : ns(ns), filename(fname), c(c), line(line), col(col), + knownLocation(knownLocation){}; + + Location clone() const; + + // mlir::Location toMLIRLocation(mlir::MLIRContext &ctx); + + /// Returns an unknown location for the given \p ns. + static Location UnknownLocation(llvm::StringRef ns) { + return Location(ns, std::nullopt, nullptr, 0, 0, false); + } +}; + +class LocationRange { +public: + Location start; + Location end; + + LocationRange() = default; + explicit LocationRange(Location _start) : start(_start), end(_start){}; + LocationRange(Location _start, Location _end) : start(_start), end(_end){}; + // LocationRange(const LocationRange &); + + bool isKnownLocation() const { return start.knownLocation; }; + + static LocationRange UnknownLocation(llvm::StringRef ns) { + return LocationRange(Location::UnknownLocation(ns)); + } +}; + +void incLocation(Location &, const char *); +void decLocation(Location &, const char *); + +} // namespace serene +#endif diff --git a/serene/src/namespace.cpp b/serene/src/namespace.cpp new file mode 100644 index 0000000..7445897 --- /dev/null +++ b/serene/src/namespace.cpp @@ -0,0 +1,223 @@ +/* -*- C++ -*- + * Serene Programming Language + * + * Copyright (c) 2019-2023 Sameer Rahmani + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include "namespace.h" + +#include "jit/jit.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +using namespace std; +using namespace llvm; + +namespace serene { + +Namespace::Namespace(jit::JIT &engine, llvm::StringRef ns_name, + std::optional filename) + : engine(engine), name(ns_name) { + if (filename.has_value()) { + this->filename.emplace(filename.value().str()); + } + + // Create the root environment + createEnv(nullptr); +}; + +SemanticEnv &Namespace::createEnv(SemanticEnv *parent) { + auto env = std::make_unique(parent); + environments.push_back(std::move(env)); + + return *environments.back(); +}; + +SemanticEnv &Namespace::getRootEnv() { + assert(!environments.empty() && "Root env is not created!"); + + return *environments.front(); +}; + +// mlir::LogicalResult Namespace::define(std::string &name, ast::Node &node) { +// auto &rootEnv = getRootEnv(); + +// if (failed(rootEnv.insert_symbol(name, node))) { +// return mlir::failure(); +// } + +// symbolList.push_back(name); +// return mlir::success(); +// } + +ast::Ast &Namespace::getTree() { return this->tree; } + +llvm::Error Namespace::ExpandTree(ast::Ast &ast) { + + // If the target phase is just parsing we don't want + // to run the semantic analyzer or anything beyond parser + if (engine.getOptions().compilationPhase == CompilationPhase::Parse) { + // we just want the raw AST + this->tree.insert(this->tree.end(), std::make_move_iterator(ast.begin()), + std::make_move_iterator(ast.end())); + ast.clear(); + return llvm::Error::success(); + } + + // just for now + this->tree.insert(this->tree.end(), std::make_move_iterator(ast.begin()), + std::make_move_iterator(ast.end())); + ast.clear(); + + // auto &rootEnv = getRootEnv(); + + // auto state = semantics::makeAnalysisState(*this, rootEnv); + // // Run the semantic analyer on the ast and then if everything + // // is ok add the form to the tree and forms + // auto maybeForm = semantics::analyze(*state, ast); + + // if (!maybeForm) { + // return maybeForm.takeError(); + // } + + // auto semanticAst = std::move(*maybeForm); + // this->tree.insert(this->tree.end(), semanticAst.begin(), + // semanticAst.end()); + + return llvm::Error::success(); +} + +// MaybeModuleOp Namespace::generate(unsigned offset) { +// // The reason why we return an optional value instead of Errors +// // is the way MLIR's diagnostic engine works. Passes may use +// // the `emit` function of operations to report errors to the +// // diagnostic engine. So we can't return any error diractly. + +// mlir::OpBuilder builder(&ctx.mlirContext); + +// // TODO: Fix the unknown location by pointing to the `ns` form +// auto module = mlir::ModuleOp::create(builder.getUnknownLoc(), +// std::optional(name)); + +// auto treeSize = getTree().size(); + +// // Walk the AST and call the `generateIR` function of each node. +// // Since nodes will have access to the a reference of the +// // namespace they can use the builder and keep adding more +// // operations to the module via the builder +// for (unsigned i = offset; i < treeSize; ++i) { +// auto &node = getTree()[i]; +// node->generateIR(*this, module); +// } + +// if (mlir::failed(mlir::verify(module))) { +// module.emitError("Can't verify the module"); +// module.erase(); +// return llvm::None; +// } + +// if (mlir::failed(runPasses(module))) { +// // TODO: Report a proper error +// module.emitError("Failure in passes!"); +// module.erase(); +// return llvm::None; +// } + +// return MaybeModuleOp(module); +// } + +// mlir::LogicalResult Namespace::runPasses(mlir::ModuleOp &m) { +// return ctx.pm.run(m); +// }; + +// void Namespace::dump() { +// llvm::outs() << "\nMLIR: \n"; +// auto maybeModuleOp = generate(); + +// if (!maybeModuleOp) { + +// llvm::errs() << "Failed to generate the IR.\n"; +// return; +// } + +// mlir::OpPrintingFlags flags; +// flags.enableDebugInfo(); + +// maybeModuleOp.getValue()->print(llvm::outs(), flags); +// }; + +// MaybeModule Namespace::compileToLLVM() { +// // The reason why we return an optional value instead of Errors +// // is the way MLIR's diagnostic engine works. Passes may use +// // the `emit` function of operations to report errors to the +// // diagnostic engine. So we can't return any error diractly. + +// auto maybeModule = generate(); + +// if (!maybeModule) { +// NAMESPACE_LOG("IR generation failed for '" << name << "'"); +// return llvm::None; +// } + +// if (ctx.getTargetPhase() >= CompilationPhase::IR) { +// mlir::ModuleOp module = maybeModule.getValue().get(); +// return ::serene::slir::compileToLLVMIR(ctx, module); +// } + +// return llvm::None; +// }; + +// MaybeModule Namespace::compileToLLVMFromOffset(unsigned offset) { +// // The reason why we return an optional value instead of Errors +// // is the way MLIR's diagnostic engine works. Passes may use +// // the `emit` function of operations to report errors to the +// // diagnostic engine. So we can't return any error diractly. + +// auto maybeModule = generate(offset); + +// if (!maybeModule) { +// NAMESPACE_LOG("IR generation failed for '" << name << "'"); +// return llvm::None; +// } + +// if (ctx.getTargetPhase() >= CompilationPhase::IR) { +// mlir::ModuleOp module = maybeModule.getValue().get(); +// return ::serene::slir::compileToLLVMIR(ctx, module); +// } + +// return llvm::None; +// }; + +NSPtr Namespace::make(jit::JIT &engine, llvm::StringRef name, + std::optional filename) { + return std::make_unique(engine, name, filename); +}; + +Namespace::~Namespace() { + // TODO: Clean up anything related to this namespace in the context + // TODO: Remove anything related to this namespace in the JIT + NAMESPACE_LOG("Destructing NS: " << name); +}; + +} // namespace serene diff --git a/serene/src/namespace.h b/serene/src/namespace.h new file mode 100644 index 0000000..23411ec --- /dev/null +++ b/serene/src/namespace.h @@ -0,0 +1,140 @@ +/* -*- C++ -*- + * Serene Programming Language + * + * Copyright (c) 2019-2023 Sameer Rahmani + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/** + * Commentary: + * Rules of a namespace: + * - A namespace has have a name and it has to own it. + * - A namespace may or may not be associated with a file + * - The internal AST of a namespace is an evergrowing tree which may expand at + * any given time. For example via iteration of a REPL + * - `environments` vector is the owner of all the semantic envs + * - The first env in the `environments` is the root env. + * + * How to create a namespace ? + * The official way to create a namespace object is to use the `SereneContext` + * object and call `readNamespace`, `importNamespace` or `makeNamespace`. + */ + +// TODO: Add a mechanism to figure out whether a namespace has changed or not +// either on memory or disk + +#ifndef NAMESPACE_H +#define NAMESPACE_H + +#include "ast.h" +#include "environment.h" +#include "utils.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#define NAMESPACE_LOG(...) \ + DEBUG_WITH_TYPE("NAMESPACE", llvm::dbgs() << __VA_ARGS__ << "\n"); + +namespace serene { +namespace jit { +class JIT; +} // namespace jit + +class Namespace; + +using NSPtr = std::unique_ptr; +using MaybeNS = llvm::Expected; +using SemanticEnv = Environment; +using SemanticEnvPtr = std::unique_ptr; +using SemanticEnvironments = std::vector; + +/// Serene's namespaces are the unit of compilation. Any code that needs to be +/// compiled has to be in a namespace. The official way to create a new +/// namespace is to use the `readNamespace`, `importNamespace` and +/// `makeNamespace` member functions of `SereneContext`. +class Namespace { + jit::JIT &engine; + /// The content of the namespace. It should alway hold a semantically + /// correct AST. It means thet the AST that we want to store here has + /// to pass the semantic analyzer checks. + ast::Ast tree; + + SemanticEnvironments environments; + + std::vector symbolList; + +public: + std::string name; + std::optional filename; + + /// Create a naw namespace with the given `name` and optional `filename` and + /// return a unique pointer to it in the given Serene context. + static NSPtr make(jit::JIT &engine, llvm::StringRef name, + std::optional filename); + + Namespace(jit::JIT &engine, llvm::StringRef ns_name, + std::optional filename); + + /// Create a new environment with the give \p parent as the parent, + /// push the environment to the internal environment storage and + /// return a reference to it. The namespace itself is the owner of + /// environments. + SemanticEnv &createEnv(SemanticEnv *parent); + + /// Return a referenece to the top level (root) environment of ns. + SemanticEnv &getRootEnv(); + + /// Define a new binding in the root environment with the given \p name + /// and the given \p node. Defining a new binding with a name that + /// already exists in legal and will overwrite the previous binding and + /// the given name will point to a new value from now on. + mlir::LogicalResult define(std::string &name, ast::Node &node); + + /// Add the given \p ast to the namespace and return any possible error. + /// The given \p ast will be added to a vector of ASTs by expanding + /// the tree vector to contain \p ast. + /// + /// This function runs the semantic analyzer on the \p ast as well. + llvm::Error ExpandTree(ast::Ast &ast); + + ast::Ast &getTree(); + + const std::vector &getSymList() { return symbolList; }; + + /// Dumps the namespace with respect to the compilation phase + // void dump(); + + ~Namespace(); +}; + +} // namespace serene + +#endif diff --git a/serene/src/options.h b/serene/src/options.h index 5e0cbde..5003401 100644 --- a/serene/src/options.h +++ b/serene/src/options.h @@ -65,7 +65,7 @@ struct Options { // appropriate code for the host. If the same function has to be part // of the runtime, then we use `targetTriple` again to generate the code // for the target platform. So, we might end up with two version of the - // same function + // same function. const llvm::Triple hostTriple; CompilationPhase compilationPhase = CompilationPhase::NoOptimization; diff --git a/serene/src/reader.cpp b/serene/src/reader.cpp new file mode 100644 index 0000000..8087b2b --- /dev/null +++ b/serene/src/reader.cpp @@ -0,0 +1,431 @@ +/* -*- C++ -*- + * Serene Programming Language + * + * Copyright (c) 2019-2023 Sameer Rahmani + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "reader.h" + +// #include "serene/errors.h" +// #include "serene/exprs/expression.h" +// #include "serene/exprs/list.h" +// #include "serene/exprs/number.h" +// #include "serene/exprs/symbol.h" +// #include "serene/namespace.h" +// #include "serene/utils.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace serene { + +namespace reader { +// LocationRange::LocationRange(const LocationRange &loc) { +// start = loc.start.clone(); +// end = loc.end.clone(); +// } + +/// Return the string represenation of the location. +std::string Location::toString() const { + return llvm::formatv("{0}:{1}", line, col); +}; + +Location Location::clone() const { + return Location{ns, filename, c, line, col, knownLocation}; +} + +/// Increase the given location by one and set the line/col value in respect to +/// the `newline` in place. +/// \param loc The `Location` data +/// \param c A pointer to the current char that the location has to point to +void incLocation(Location &loc, const char *c) { + // TODO: Handle the end of line with respect to the OS. + // increase the current position in the buffer with respect to the end + // of line. + auto newline = *c == '\n'; + + if (!newline) { + loc.col++; + } else { + loc.line++; + loc.col = 0; + } +} + +/// decrease the given location by one and set the line/col value in respect to +/// the `newline` in place. +/// \param loc The `Location` data +/// \param c A pointer to the current char that the location has to point to +void decLocation(Location &loc, const char *c) { + // TODO: Handle the end of line with respect to the OS. + // increase the current position in the buffer with respect to the end + // of line. + auto newline = *c == '\n'; + + if (newline) { + loc.line = loc.line == 0 ? 0 : loc.line - 1; + + // We don't move back the `col` value because we simply don't know it + } else { + loc.col = loc.col == 0 ? 0 : loc.col - 1; + } +} + +Reader::Reader(SereneContext &ctx, llvm::StringRef buffer, llvm::StringRef ns, + std::optional filename) + : ctx(ctx), ns(ns), filename(filename), buf(buffer), + currentLocation(Location(ns, filename)) { + UNUSED(this->ctx); + READER_LOG("Setting the first char of the buffer"); + currentChar = buf.begin() - 1; + currentPos = 1; + currentLocation.line = 1; + currentLocation.col = 1; +}; + +Reader::Reader(SereneContext &ctx, llvm::MemoryBufferRef buffer, + llvm::StringRef ns, std::optional filename) + : Reader(ctx, buffer.getBuffer(), ns, filename){}; + +Reader::~Reader() { READER_LOG("Destroying the reader"); } + +void Reader::advanceByOne() { + currentChar++; + currentPos++; + currentLocation.col++; + + if (*currentChar == '\n') { + READER_LOG("Detected end of line"); + + if (readEOL) { + currentLocation.col = 1; + currentLocation.line++; + } + + readEOL = true; + } else { + if (readEOL) { + currentLocation.line++; + currentLocation.col = 1; + } + readEOL = false; + } + + READER_LOG("Moving to Char: " << *currentChar << " at location: " + << currentLocation.toString()); +}; +void Reader::advance(bool skipWhitespace) { + if (skipWhitespace) { + for (;;) { + const auto *next = currentChar + 1; + + if (isspace(*next) == 0) { + return; + } + + advanceByOne(); + } + } else { + advanceByOne(); + } +}; + +const char *Reader::nextChar(bool skipWhitespace, unsigned count) { + if (!skipWhitespace) { + READER_LOG("Next char: " << *(currentChar + count)); + return currentChar + count; + } + + const auto *c = currentChar + 1; + while (isspace(*c) != 0) { + c++; + }; + + READER_LOG("Next char: " << *c); + return c; +}; + +bool Reader::isEndOfBuffer(const char *c) { + return *c == '\0' || currentPos > buf.size() || ((const int)*c == EOF); +}; + +Location Reader::getCurrentLocation() { return currentLocation.clone(); }; + +/// A predicate function indicating whether the given char `c` is a valid +/// char for the starting point of a symbol or not. +bool Reader::isValidForIdentifier(char c) { + switch (c) { + case '!': + case '$': + case '%': + case '&': + case '*': + case '+': + case '-': + case '.': + case '~': + case '/': + case ':': + case '<': + case '=': + case '>': + case '?': + case '@': + case '^': + case '_': + return true; + } + + return std::isalnum(c) != 0; +} + +/// Reads a number, +/// \param neg whether to read a negative number or not. +exprs::MaybeNode Reader::readNumber(bool neg) { + READER_LOG("Reading a number..."); + std::string number(neg ? "-" : ""); + bool floatNum = false; + bool empty = false; + + const auto *c = nextChar(); + advance(); + + LocationRange loc(getCurrentLocation()); + + if (isdigit(*c) == 0) { + return errors::makeError(ctx, errors::InvalidDigitForNumber, loc); + } + + for (;;) { + number += *c; + c = nextChar(false); + empty = false; + + if ((isdigit(*c) != 0) || *c == '.') { + if (*c == '.' && floatNum) { + loc = LocationRange(getCurrentLocation()); + return errors::makeError(ctx, errors::TwoFloatPoints, loc); + } + + if (*c == '.') { + floatNum = true; + } + + advance(); + continue; + } + break; + } + + if (((std::isalpha(*c) != 0) && !empty) || empty) { + advance(); + loc.start = getCurrentLocation(); + return errors::makeError(ctx, errors::InvalidDigitForNumber, loc); + } + + loc.end = getCurrentLocation(); + return exprs::make(loc, number, neg, floatNum); +}; + +/// Reads a symbol. If the symbol looks like a number +/// If reads it as number +exprs::MaybeNode Reader::readSymbol() { + READER_LOG("Reading a symbol..."); + LocationRange loc; + const auto *c = nextChar(); + + if (!this->isValidForIdentifier(*c) || isEndOfBuffer(c) || + (isspace(*c) != 0)) { + advance(); + loc = LocationRange(getCurrentLocation()); + std::string msg; + + if (*c == ')') { + msg = "An extra ')' is detected."; + } + + return errors::makeError(ctx, errors::InvalidCharacterForSymbol, loc, msg); + } + + if (*c == '-') { + const auto *next = nextChar(false, 2); + if (isdigit(*next) != 0) { + // Swallow the - + advance(); + return readNumber(true); + } + } + + if (isdigit(*c) != 0) { + return readNumber(false); + } + + std::string sym; + advance(); + + for (;;) { + sym += *c; + c = nextChar(); + + if (!isEndOfBuffer(c) && + ((((isspace(*c)) == 0) && this->isValidForIdentifier(*c)))) { + advance(); + continue; + } + break; + } + + // TODO: Make sure that the symbol has 0 or 1 '/'. + + // TODO: Make sure that `/` is not at the start or at the end of the symbol + + loc.end = getCurrentLocation(); + return exprs::makeSuccessfulNode(loc, sym, this->ns); +}; + +/// Reads a list recursively +exprs::MaybeNode Reader::readList() { + READER_LOG("Reading a list..."); + + const auto *c = nextChar(); + advance(); + + auto list = exprs::makeAndCast(getCurrentLocation()); + + // TODO: Replace the assert with an actual check. + assert(*c == '('); + + bool list_terminated = false; + + do { + const auto *c = nextChar(true); + + if (isEndOfBuffer(c)) { + advance(true); + advance(); + list->location.end = getCurrentLocation(); + return errors::makeError(ctx, errors::EOFWhileScaningAList, + list->location); + } + + switch (*c) { + case ')': + advance(true); + advance(); + list_terminated = true; + list->location.end = getCurrentLocation(); + break; + + default: + advance(true); + auto expr = readExpr(); + if (!expr) { + return expr; + } + + list->append(*expr); + } + + } while (!list_terminated); + + return list; +}; + +/// Reads an expression by dispatching to the proper reader function. +exprs::MaybeNode Reader::readExpr() { + const auto *c = nextChar(true); + + READER_LOG("Read char at `readExpr`: " << *c); + + if (isEndOfBuffer(c)) { + return exprs::EmptyNode; + } + + switch (*c) { + case '(': { + advance(true); + return readList(); + } + + default: + advance(true); + return readSymbol(); + } +}; + +/// Reads all the expressions in the reader's buffer as an AST. +/// Each expression type (from the reader perspective) has a +/// reader function. +exprs::MaybeAst Reader::read() { + + for (size_t current_pos = 0; current_pos < buf.size();) { + const auto *c = nextChar(true); + + if (isEndOfBuffer(c)) { + break; + } + + advance(true); + + auto tmp = readExpr(); + + if (tmp) { + if (*tmp == nullptr) { + break; + } + + this->ast.push_back(std::move(*tmp)); + + } else { + return tmp.takeError(); + } + } + + return std::move(this->ast); +}; + +exprs::MaybeAst read(SereneContext &ctx, const llvm::StringRef input, + llvm::StringRef ns, + std::optional filename) { + reader::Reader r(ctx, input, ns, filename); + auto ast = r.read(); + return ast; +} + +exprs::MaybeAst read(SereneContext &ctx, const llvm::MemoryBufferRef input, + llvm::StringRef ns, + std::optional filename) { + reader::Reader r(ctx, input, ns, filename); + + auto ast = r.read(); + return ast; +} +} // namespace reader +} // namespace serene diff --git a/serene/src/reader.h b/serene/src/reader.h new file mode 100644 index 0000000..1d837f5 --- /dev/null +++ b/serene/src/reader.h @@ -0,0 +1,112 @@ +/* -*- C++ -*- + * Serene Programming Language + * + * Copyright (c) 2019-2023 Sameer Rahmani + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/** + * Commentary: + * `Reader` is the base parser class and accepts a buffer like object (usually + * `llvm::StringRef`) as the input and parses it to create an AST (look at the + * `serene::exprs::Expression` class). + * + * The parsing algorithm is quite simple and it is a LL(2). It means that, we + * start parsing the input from the very first character and parse the input + * one char at a time till we reach the end of the input. Please note that + * when we call the `advance` function to move forward in the buffer, we + * can't go back. In order to look ahead in the buffer without moving in the + * buffer we use the `nextChar` method. + * + * We have dedicated methods to read different forms like `list`, `symbol` + * `number` and etc. Each of them return a `MaybeNode` that in the success + * case contains the node and an `Error` on the failure case. + */ + +#ifndef READER_H +#define READER_H + +#include "ast.h" +#include "location.h" + +#include +#include + +namespace serene { +/// Base reader class which reads from a string directly. +class Reader { +private: + llvm::StringRef ns; + std::optional filename; + + const char *currentChar = nullptr; + + llvm::StringRef buf; + + /// The position tracker that we will use to determine the end of the + /// buffer since the buffer might not be null terminated + size_t currentPos = static_cast(-1); + + Location currentLocation; + + bool readEOL = false; + + /// Returns a clone of the current location + Location getCurrentLocation(); + /// Returns the next character from the stream. + /// @param skip_whitespace An indicator to whether skip white space like chars + /// or not + void advance(bool skipWhitespace = false); + void advanceByOne(); + + const char *nextChar(bool skipWhitespace = false, unsigned count = 1); + + /// Returns a boolean indicating whether the given input character is valid + /// for an identifier or not. + static bool isValidForIdentifier(char c); + + // The property to store the ast tree + Ast ast; + + MaybeNode readSymbol(); + MaybeNode readNumber(bool); + MaybeNode readList(); + MaybeNode readExpr(); + + bool isEndOfBuffer(const char *); + +public: + Reader(llvm::StringRef buf, llvm::StringRef ns, + std::optional filename); + Reader(llvm::MemoryBufferRef buf, llvm::StringRef ns, + std::optional filename); + + // void setInput(const llvm::StringRef string); + + /// Parses the the input and creates a possible AST out of it or errors + /// otherwise. + MaybeAst read(); + + ~Reader(); +}; + +/// Parses the given `input` string and returns a `Result` +/// which may contains an AST or an `llvm::Error` +MaybeAst read(llvm::StringRef input, llvm::StringRef ns, + std::optional filename); +MaybeAst read(llvm::MemoryBufferRef input, llvm::StringRef ns, + std::optional filename); + +} // namespace serene +#endif diff --git a/serene/src/source_mgr.cpp b/serene/src/source_mgr.cpp new file mode 100644 index 0000000..5f5ac93 --- /dev/null +++ b/serene/src/source_mgr.cpp @@ -0,0 +1,225 @@ +/* -*- C++ -*- + * Serene Programming Language + * + * Copyright (c) 2019-2023 Sameer Rahmani + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "serene/source_mgr.h" + +#include "serene/namespace.h" +#include "serene/reader/location.h" +#include "serene/reader/reader.h" +#include "serene/utils.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace serene { + +std::string SourceMgr::convertNamespaceToPath(std::string ns_name) { + std::replace(ns_name.begin(), ns_name.end(), '.', '/'); + + llvm::SmallString path; + path.append(ns_name); + llvm::sys::path::native(path); + + return std::string(path); +}; + +bool SourceMgr::isValidBufferID(unsigned i) const { + return i != 0 && i <= buffers.size(); +}; + +SourceMgr::MemBufPtr SourceMgr::findFileInLoadPath(const std::string &name, + std::string &importedFile) { + + auto path = convertNamespaceToPath(name); + + // If the file didn't exist directly, see if it's in an include path. + for (unsigned i = 0, e = loadPaths.size(); i != e; ++i) { + + // TODO: Ugh, Udgly, fix this using llvm::sys::path functions + importedFile = loadPaths[i] + llvm::sys::path::get_separator().data() + + path + "." + DEFAULT_SUFFIX; + + SMGR_LOG("Try to load the ns from: " + importedFile); + auto newBufOrErr = llvm::MemoryBuffer::getFile(importedFile); + + if (auto err = newBufOrErr.getError()) { + llvm::consumeError(llvm::errorCodeToError(err)); + continue; + } + + return std::move(*newBufOrErr); + } + + return nullptr; +}; + +MaybeNS SourceMgr::readNamespace(SereneContext &ctx, std::string name, + reader::LocationRange importLoc) { + std::string importedFile; + + SMGR_LOG("Attempt to load namespace: " + name); + MemBufPtr newBufOrErr(findFileInLoadPath(name, importedFile)); + + if (newBufOrErr == nullptr) { + auto msg = llvm::formatv("Couldn't find namespace '{0}'", name).str(); + return errors::makeError(ctx, errors::NSLoadError, importLoc, msg); + } + + auto bufferId = AddNewSourceBuffer(std::move(newBufOrErr), importLoc); + + UNUSED(nsTable.insert_or_assign(name, bufferId)); + + if (bufferId == 0) { + auto msg = llvm::formatv("Couldn't add namespace '{0}'", name).str(); + return errors::makeError(ctx, errors::NSAddToSMError, importLoc, msg); + } + + // Since we moved the buffer to be added as the source storage we + // need to get a pointer to it again + const auto *buf = getMemoryBuffer(bufferId); + + // Read the content of the buffer by passing it the reader + auto maybeAst = reader::read(ctx, buf->getBuffer(), name, + std::optional(llvm::StringRef(importedFile))); + + if (!maybeAst) { + SMGR_LOG("Couldn't Read namespace: " + name); + return maybeAst.takeError(); + } + + // Create the NS and set the AST + auto ns = + ctx.makeNamespace(name, std::optional(llvm::StringRef(importedFile))); + + if (auto errs = ns->addTree(*maybeAst)) { + SMGR_LOG("Couldn't set the AST for namespace: " + name); + return errs; + } + + return ns; +}; + +unsigned SourceMgr::AddNewSourceBuffer(std::unique_ptr f, + reader::LocationRange includeLoc) { + SrcBuffer nb; + nb.buffer = std::move(f); + nb.importLoc = includeLoc; + buffers.push_back(std::move(nb)); + return buffers.size(); +}; + +template +static std::vector &GetOrCreateOffsetCache(void *&offsetCache, + llvm::MemoryBuffer *buffer) { + if (offsetCache) { + return *static_cast *>(offsetCache); + } + + // Lazily fill in the offset cache. + auto *offsets = new std::vector(); + size_t sz = buffer->getBufferSize(); + + // TODO: Replace this assert with a realtime check + assert(sz <= std::numeric_limits::max()); + + llvm::StringRef s = buffer->getBuffer(); + for (size_t n = 0; n < sz; ++n) { + if (s[n] == '\n') { + offsets->push_back(static_cast(n)); + } + } + + offsetCache = offsets; + return *offsets; +} + +template +const char *SourceMgr::SrcBuffer::getPointerForLineNumberSpecialized( + unsigned lineNo) const { + std::vector &offsets = + GetOrCreateOffsetCache(offsetCache, buffer.get()); + + // We start counting line and column numbers from 1. + if (lineNo != 0) { + --lineNo; + } + + const char *bufStart = buffer->getBufferStart(); + + // The offset cache contains the location of the \n for the specified line, + // we want the start of the line. As such, we look for the previous entry. + if (lineNo == 0) { + return bufStart; + } + + if (lineNo > offsets.size()) { + return nullptr; + } + return bufStart + offsets[lineNo - 1] + 1; +} + +/// Return a pointer to the first character of the specified line number or +/// null if the line number is invalid. +const char * +SourceMgr::SrcBuffer::getPointerForLineNumber(unsigned lineNo) const { + size_t sz = buffer->getBufferSize(); + if (sz <= std::numeric_limits::max()) { + return getPointerForLineNumberSpecialized(lineNo); + } + + if (sz <= std::numeric_limits::max()) { + return getPointerForLineNumberSpecialized(lineNo); + } + + if (sz <= std::numeric_limits::max()) { + return getPointerForLineNumberSpecialized(lineNo); + } + + return getPointerForLineNumberSpecialized(lineNo); +} + +SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&other) noexcept + : buffer(std::move(other.buffer)), offsetCache(other.offsetCache), + importLoc(other.importLoc) { + other.offsetCache = nullptr; +} + +SourceMgr::SrcBuffer::~SrcBuffer() { + if (offsetCache != nullptr) { + size_t sz = buffer->getBufferSize(); + if (sz <= std::numeric_limits::max()) { + delete static_cast *>(offsetCache); + } else if (sz <= std::numeric_limits::max()) { + delete static_cast *>(offsetCache); + } else if (sz <= std::numeric_limits::max()) { + delete static_cast *>(offsetCache); + } else { + delete static_cast *>(offsetCache); + } + offsetCache = nullptr; + } +} + +}; // namespace serene diff --git a/serene/src/source_mgr.h b/serene/src/source_mgr.h new file mode 100644 index 0000000..d5a681a --- /dev/null +++ b/serene/src/source_mgr.h @@ -0,0 +1,190 @@ +/* -*- C++ -*- + * Serene Programming Language + * + * Copyright (c) 2019-2023 Sameer Rahmani + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef SERENE_SOURCE_MGR_H +#define SERENE_SOURCE_MGR_H + +#include "location.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define SMGR_LOG(...) \ + DEBUG_WITH_TYPE("sourcemgr", llvm::dbgs() \ + << "[SMGR]: " << __VA_ARGS__ << "\n"); + +namespace serene { +class SereneContext; + +/// This class is quite similar to the `llvm::SourceMgr` in functionality. We +/// even borrowed some of the code from the original implementation but removed +/// a lot of code that were irrelevant to us. +/// +/// SouceMgr is responsible for finding a namespace in the `loadPaths` and read +/// the content of the `.srn` (or any of the `DEFAULT_SUFFIX`) into a +/// `llvm::MemoryBuffer` embedded in a `SrcBuffer` object as the owner of the +/// source files and then it will call the `reader` on the buffer to parse it +/// and create the actual `Namespace` object from the parsed AST. +/// +/// Later on, whenever we need to refer to the source file of a namespace for +/// diagnosis purposes or any other purpose we can use the functions in this +/// class to get hold of a pointer to a specific `Location` of the +/// buffer. +/// +/// Note: Unlike the original version, SourceMgr does not handle the diagnostics +/// and it uses the Serene's `DiagnosticEngine` for that matter. +class SourceMgr { + +public: + // TODO: Make it a vector of supported suffixes + constexpr static const char *DEFAULT_SUFFIX = "srn"; + +private: + struct SrcBuffer { + /// The memory buffer for the file. + std::unique_ptr buffer; + + /// Vector of offsets into Buffer at which there are line-endings + /// (lazily populated). Once populated, the '\n' that marks the end of + /// line number N from [1..] is at Buffer[OffsetCache[N-1]]. Since + /// these offsets are in sorted (ascending) order, they can be + /// binary-searched for the first one after any given offset (eg. an + /// offset corresponding to a particular SMLoc). + /// + /// Since we're storing offsets into relatively small files (often smaller + /// than 2^8 or 2^16 bytes), we select the offset vector element type + /// dynamically based on the size of Buffer. + mutable void *offsetCache = nullptr; + + /// Look up a given \p ptr in in the buffer, determining which line it came + /// from. + unsigned getLineNumber(const char *ptr) const; + template + unsigned getLineNumberSpecialized(const char *ptr) const; + + /// Return a pointer to the first character of the specified line number or + /// null if the line number is invalid. + const char *getPointerForLineNumber(unsigned lineNo) const; + + template + const char *getPointerForLineNumberSpecialized(unsigned lineNo) const; + + /// This is the location of the parent import or unknown location if it is + /// the main namespace + LocationRange importLoc; + + SrcBuffer() = default; + SrcBuffer(SrcBuffer &&) noexcept; + SrcBuffer(const SrcBuffer &) = delete; + SrcBuffer &operator=(const SrcBuffer &) = delete; + ~SrcBuffer(); + }; + using MemBufPtr = std::unique_ptr; + + /// This is all of the buffers that we are reading from. + std::vector buffers; + + /// A hashtable that works as an index from namespace names to the buffer + /// position it the `buffer` + llvm::StringMap nsTable; + + // This is the list of directories we should search for include files in. + std::vector loadPaths; + + // Find a namespace file with the given \p name in the load path and \r retuns + // a unique pointer to the memory buffer containing the content or an error. + // In the success case it will put the path of the file into the \p + // importedFile. + MemBufPtr findFileInLoadPath(const std::string &name, + std::string &importedFile); + + bool isValidBufferID(unsigned i) const; + + /// Converts the ns name to a partial path by replacing the dots with slashes + static std::string convertNamespaceToPath(std::string ns_name); + +public: + SourceMgr() = default; + SourceMgr(const SourceMgr &) = delete; + SourceMgr &operator=(const SourceMgr &) = delete; + SourceMgr(SourceMgr &&) = default; + SourceMgr &operator=(SourceMgr &&) = default; + ~SourceMgr() = default; + + /// Set the `loadPaths` to the given \p dirs. `loadPaths` is a vector of + /// directories that Serene will look in order to find a file that constains a + /// namespace which it is looking for. + void setLoadPaths(std::vector &dirs) { loadPaths.swap(dirs); } + + /// Return a reference to a `SrcBuffer` with the given ID \p i. + const SrcBuffer &getBufferInfo(unsigned i) const { + assert(isValidBufferID(i)); + return buffers[i - 1]; + } + + /// Return a reference to a `SrcBuffer` with the given namspace name \p ns. + const SrcBuffer &getBufferInfo(llvm::StringRef ns) const { + auto bufferId = nsTable.lookup(ns); + + if (bufferId == 0) { + // No such namespace + llvm_unreachable("couldn't find the src buffer for a namespace. It " + "should never happen."); + } + + return buffers[bufferId - 1]; + } + + /// Return a pointer to the internal `llvm::MemoryBuffer` of the `SrcBuffer` + /// with the given ID \p i. + const llvm::MemoryBuffer *getMemoryBuffer(unsigned i) const { + assert(isValidBufferID(i)); + return buffers[i - 1].buffer.get(); + } + + unsigned getNumBuffers() const { return buffers.size(); } + + /// Add a new source buffer to this source manager. This takes ownership of + /// the memory buffer. + unsigned AddNewSourceBuffer(std::unique_ptr f, + LocationRange includeLoc); + + /// Lookup for a file containing the namespace definition of with given + /// namespace name \p name. In case that the file exists, it returns an + /// `ErrorTree`. It will use the parser to read the file and create an AST + /// from it. Then create a namespace, set the its AST to the AST that we just + /// read from the file and return a shared pointer to the namespace. + /// + /// \p importLoc is a location in the source code where the give namespace is + /// imported. + MaybeNS readNamespace(SereneContext &ctx, std::string name, + LocationRange importLoc); +}; + +}; // namespace serene + +#endif diff --git a/serene/src/types.h b/serene/src/types.h new file mode 100644 index 0000000..1d065eb --- /dev/null +++ b/serene/src/types.h @@ -0,0 +1,86 @@ +/* -*- C -*- + * Serene Programming Language + * + * Copyright (c) 2019-2023 Sameer Rahmani + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef TYPES_H +#define TYPES_H + +#include "serene/config.h" + +#include + +typedef struct { + const TypeID id; + const char *name; + +} Type; + +typedef struct { + const Type type; + const void *data; +} Object; + +static const Type type = {.id = TYPE, .name = "type"}; +static const Type nil_type = {.id = NIL, .name = "nil"}; +static const Type function_type = {.id = FN, .name = "function"}; +static const Type protocol_type = {.id = PROTOCOL, .name = "protocol"}; +static const Type int_type = {.id = INT, .name = "int"}; +static const Type list_type = {.id = LIST, .name = "list"}; + +typedef struct { + const Type type; + const Type **args; + const Type *returnType; +} FunctionType; + +typedef struct { + const Type type; + const char *name; + const FunctionType **functions; +} ProtocolType; + +typedef struct { + const Type type; + const Type first; + const Type second; +} PairType; + +typedef struct { + const PairType type; + void *first; + void *second; +} Pair; + +typedef struct { + const Pair *head; + const unsigned int len; +} List; + +typedef struct { + const char *name; +} Symbol; + +typedef struct { + const char *data; + const unsigned int len; +} String; + +typedef struct { + const long data; +} Number; + +#endif diff --git a/serene/src/utils.h b/serene/src/utils.h new file mode 100644 index 0000000..0b849ce --- /dev/null +++ b/serene/src/utils.h @@ -0,0 +1,138 @@ +/* -*- C++ -*- + * Serene Programming Language + * + * Copyright (c) 2019-2023 Sameer Rahmani + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef UTILS_H +#define UTILS_H + +#include + +#include + +// Sometimes we need this to make both analyzer happy +// and the fn signature right. +#define UNUSED(x) (void)(x) + +// We use this value with llvm::SmallString +#define MAX_PATH_SLOTS 256 +// C++17 required. We can't go back to 14 any more :)) + +namespace serene { + +/// A similar type to Rust's Result data structure. It either holds a value of +/// type `T` successfully or holds a value of type `E` errorfully. It is +/// designed to be used in situations which the return value of a function might +/// contains some errors. The official way to use this type is to use the +/// factory functions `Success` and `Error`. For example: +/// +/// \code +/// auto successfulResult = Result::success(3); +/// auto notOkResult = Result::error(SomeLLVMError()); +// \endcode +/// +/// In order check for a value being errorful or successful checkout the `ok` +/// method or simply use the value as a conditiona. +/// +/// This class is setup in a way tha you can us a value of type `T` in places +/// that the compiler expects a `Result`. So for example: +/// +/// \code +/// Result fn() {return 2;} +/// \endcode +/// +/// works perfectly. +template +class Result { + + // The actual data container + std::variant contents; + + /// The main constructor which we made private to avoid ambiguousness in + /// input type. `Success` and `Error` call this ctor. + template + Result(InPlace i, Content &&c) : contents(i, std::forward(c)){}; + +public: + explicit constexpr Result(const T &v) + : Result(std::in_place_index_t<0>(), std::move(v)){}; + + /// Return a pointer to the success case value of the result. It is + /// important to check for the success case before calling this function. + constexpr const T *getPointer() const { return &getValue(); } + + /// Return a pointer to the success case value of the result. It is + /// important to check for the success case before calling this function. + T *getPointer() { return &getValue(); } + + /// Return a pointer to the success case value of the result. It is + /// important to check for the success case before calling this function. + T *operator->() { return getPointer(); } + + /// Return a pointer to the success case value of the result. It is + /// important to check for the success case before calling this function. + constexpr const T *operator->() const { return getPointer(); } + + /// Dereference the success case and returns the value. It is + /// important to check for the success case before calling this function. + constexpr const T &operator*() const & { return getValue(); } + + /// Dereference the success case and returns the value. It is + /// important to check for the success case before calling this function. + T &operator*() & { return getValue(); } + + /// Create a succesfull result with the given value of type `T`. + static Result success(T v) { + return Result(std::in_place_index_t<0>(), std::move(v)); + } + + /// Create an errorful result with the given value of type `E` (default + /// `llvm::Error`). + static Result error(E e) { + return Result(std::in_place_index_t<1>(), std::move(e)); + } + + /// Return the value if it's successful otherwise throw an error + T &&getValue() && { return std::move(std::get<0>(contents)); }; + + /// Return the error value if it's errorful otherwise throw an error + E &&getError() && { return std::move(std::get<1>(contents)); }; + + // using std::get, it'll throw if contents doesn't contain what you ask for + + /// Return the value if it's successful otherwise throw an error + T &getValue() & { return std::get<0>(contents); }; + + /// Return the error value if it's errorful otherwise throw an error + E &getError() & { return std::get<1>(contents); }; + + const T &getValue() const & { return std::get<0>(contents); } + const E &getError() const & { return std::get<1>(contents); } + + /// Return the a boolean value indicating whether the value is succesful + /// or errorful. + bool ok() const { return std::holds_alternative(contents); }; + + operator bool() const { return ok(); } +}; + +inline void makeFQSymbolName(const llvm::StringRef &ns, + const llvm::StringRef &sym, std::string &result) { + result = (ns + "/" + sym).str(); +}; + +} // namespace serene +#endif