Move over namespace, ast, and the reader
ci/woodpecker/push/build Pipeline was successful Details
ci/woodpecker/push/docs Pipeline was successful Details

This commit is contained in:
Sameer Rahmani 2023-08-08 22:32:29 +01:00
parent 47c52d0488
commit b1cca14433
Signed by: lxsameer
GPG Key ID: B0A4AF28AB9FD90B
17 changed files with 2179 additions and 1 deletions

View File

@ -51,6 +51,8 @@ option(LLVM_USE_PERF "If the target LLVM build is built with LLVM_USE_PERF" OFF)
# Only do these if this is the main project, and not if it is included through add_subdirectory
if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
## Settings =======================
set(C_STANDARD 17)
# specify the C++ standard
if (CPP_20_SUPPORT)
set(CMAKE_CXX_STANDARD 20)

View File

@ -24,4 +24,31 @@
// Should we build the support for MLIR CL OPTIONS?
#cmakedefine SERENE_WITH_MLIR_CL_OPTION
#ifdef __cplusplus
enum class TypeID {
#else
typedef enum {
#endif
NIL = 0,
SYMBOL,
TYPE,
FN,
NUMBER,
INT,
CSTRING,
STRING,
KEYWORD,
NAMESPACE,
LIST,
MAP,
VECTOR,
STRUCT,
PROTOCOL,
Error,
}
#ifndef __cplusplus
TypeID
#endif
;
#endif

View File

@ -19,4 +19,8 @@ target_sources(serene PRIVATE
commands/commands.cpp
jit/jit.cpp
ast.cpp
namespace.cpp
)

167
serene/src/ast.cpp Normal file
View File

@ -0,0 +1,167 @@
/* -*- C++ -*-
* Serene Programming Language
*
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "ast.h"
#include <llvm/Support/FormatVariadic.h>
namespace serene::ast {
// ============================================================================
// Symbol
// ============================================================================
Symbol::Symbol(const LocationRange &loc, llvm::StringRef name,
llvm::StringRef currentNS)
: Expression(loc) {
// IMPORTANT NOTE: the `name` and `currentNS` should be valid string and
// already validated.
auto partDelimiter = name.find('/');
if (partDelimiter == std::string::npos) {
nsName = currentNS.str();
this->name = name.str();
} else {
this->name = name.substr(partDelimiter + 1, name.size()).str();
nsName = name.substr(0, partDelimiter).str();
}
};
Symbol::Symbol(Symbol &s) : Expression(s.location) {
this->name = s.name;
this->nsName = s.nsName;
};
TypeID Symbol::getType() const { return TypeID::SYMBOL; };
std::string Symbol::toString() const {
return llvm::formatv("<Symbol {0}/{1}>", nsName, name);
}
bool Symbol::classof(const Expression *e) {
return e->getType() == TypeID::SYMBOL;
};
// ============================================================================
// Number
// ============================================================================
Number::Number(const LocationRange &loc, const long &num)
: Expression(loc), value(num), isNeg(num < 0), isFloat(false){};
Number::Number(const LocationRange &loc, const double &num)
: Expression(loc), value(num), isNeg(num < 0), isFloat(true){};
Number::Number(Number &n) : Expression(n.location) { this->value = n.value; };
TypeID Number::getType() const { return TypeID::NUMBER; };
std::string Number::toString() const {
if (isFloat) {
return llvm::formatv("<Number {0}{1}>", std::get<double>(value));
}
return llvm::formatv("<Number {0}{1}>", std::get<long>(value));
}
bool Number::classof(const Expression *e) {
return e->getType() == TypeID::NUMBER;
};
// ============================================================================
// List
// ============================================================================
List::List(const LocationRange &loc, Ast &v) : Expression(loc) {
this->elements.swap(v);
v.clear();
};
TypeID List::getType() const { return TypeID::LIST; };
std::string List::toString() const {
std::string s{this->elements.empty() ? "-" : ""};
for (const auto &n : this->elements) {
s = llvm::formatv("{0}, {1}", s, n->toString());
}
return llvm::formatv("<List {0}>", s);
}
bool List::classof(const Expression *e) {
return e->getType() == TypeID::LIST;
};
// ============================================================================
// String
// ============================================================================
String::String(const LocationRange &loc, llvm::StringRef v)
: Expression(loc), data(v.str()){};
String::String(String &s) : Expression(s.location), data(s.data){};
TypeID String::getType() const { return TypeID::STRING; };
std::string String::toString() const {
const short truncateSize = 10;
return llvm::formatv(
"<String '{0}'>",
data.substr(0, data.size() >= truncateSize ? truncateSize : data.size()));
}
bool String::classof(const Expression *e) {
return e->getType() == TypeID::STRING;
};
// ============================================================================
// Keyword
// ============================================================================
Keyword::Keyword(const LocationRange &loc, llvm::StringRef name)
: Expression(loc), name(name.str()){};
Keyword::Keyword(Keyword &s) : Expression(s.location) { this->name = s.name; };
TypeID Keyword::getType() const { return TypeID::KEYWORD; };
std::string Keyword::toString() const {
return llvm::formatv("<Keyword {0}>", name);
}
bool Keyword::classof(const Expression *e) {
return e->getType() == TypeID::KEYWORD;
};
// ============================================================================
// Error
// ============================================================================
Error::Error(const LocationRange &loc, std::unique_ptr<Keyword> tag,
llvm::StringRef msg)
: Expression(loc), msg(msg.str()), tag(std::move(tag)){};
Error::Error(Error &e) : Expression(e.location) {
this->msg = e.msg;
this->tag = std::move(e.tag);
};
TypeID Error::getType() const { return TypeID::KEYWORD; };
std::string Error::toString() const {
return llvm::formatv("<Error {0}>", msg);
}
bool Error::classof(const Expression *e) {
return e->getType() == TypeID::KEYWORD;
};
} // namespace serene::ast

254
serene/src/ast.h Normal file
View File

@ -0,0 +1,254 @@
/* -*- C++ -*-
* Serene Programming Language
*
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef AST_H
#define AST_H
#include "location.h"
#include "serene/config.h"
#include <llvm/Support/Error.h>
#include <memory>
namespace serene::ast {
struct Expression;
using Node = std::unique_ptr<Expression>;
using MaybeNode = llvm::Expected<Node>;
using Ast = std::vector<Node>;
using MaybeAst = llvm::Expected<Ast>;
constexpr static auto EmptyNode = nullptr;
// ============================================================================
// Expression
// The abstract class that all the AST nodes derived from. It provides the
// common interface for the expressions to implement.
// ============================================================================
struct Expression {
/// The location range provide information regarding to where in the input
/// string the current expression is used.
LocationRange location;
Expression(const LocationRange &loc) : location(loc){};
virtual ~Expression() = default;
/// Returns the type of the expression. We need this funciton to perform
/// dynamic casting of expression object to implementations such as lisp or
/// symbol.
virtual TypeID getType() const = 0;
/// The AST representa htion of an expression
virtual std::string toString() const = 0;
/// Analyzes the semantics of current node and return a new node in case
/// that we need to semantically rewrite the current node and replace it with
/// another node. For example to change from a List containing `(def a b)`
/// to a `Def` node that represents defining a new binding.
///
/// \param state is the analysis state object of the semantic analyzer.
// virtual MaybeNode analyze(semantics::AnalysisState &state) = 0;
/// Genenates the correspondig SLIR of the expressoin and attach it to the
/// given module.
///
/// \param ns The namespace that current expression is in it.
/// \param m The target MLIR moduleOp to attach the operations to
// virtual void generateIR(serene::Namespace &ns, mlir::ModuleOp &m) = 0;
};
// ============================================================================
// Symbol
// It represent a lisp symbol (don't mix it up with ELF symbols).
// ============================================================================
struct Symbol : public Expression {
std::string name;
std::string nsName;
Symbol(const LocationRange &loc, llvm::StringRef name,
llvm::StringRef currentNS);
Symbol(Symbol &s);
TypeID getType() const override;
std::string toString() const override;
~Symbol() = default;
static bool classof(const Expression *e);
};
// ============================================================================
// Number
// ============================================================================
struct Number : public Expression {
// TODO: [ast] Split the number type into their own types
std::variant<long, double> value;
// /TODO
bool isNeg;
bool isFloat;
Number(const LocationRange &loc, const long &num);
Number(const LocationRange &loc, const unsigned long &num);
Number(const LocationRange &loc, const double &num);
Number(Number &n);
TypeID getType() const override;
std::string toString() const override;
~Number() = default;
static bool classof(const Expression *e);
};
// ============================================================================
// List
// ============================================================================
struct List : public Expression {
Ast elements;
List(const LocationRange &loc, Ast &v);
List(const List &l) = delete;
List(List &&l) noexcept = default;
TypeID getType() const override;
std::string toString() const override;
~List() = default;
static bool classof(const Expression *e);
};
// ============================================================================
// String
// ============================================================================
struct String : public Expression {
std::string data;
String(const LocationRange &loc, llvm::StringRef v);
String(String &s);
TypeID getType() const override;
std::string toString() const override;
~String() = default;
static bool classof(const Expression *e);
};
// ============================================================================
// Keyword
// ============================================================================
struct Keyword : public Expression {
std::string name;
Keyword(const LocationRange &loc, llvm::StringRef name);
Keyword(Keyword &s);
TypeID getType() const override;
std::string toString() const override;
~Keyword() = default;
static bool classof(const Expression *e);
};
// ============================================================================
// Error
// One way of representing errors is to just treat them as another type of node
// in the AST and the parser can generate them in case of any error or semantic
// analizer can do the same. At the time of processing the AST by the JIT
// or even anytime earlier we can just stop the execution and deal with the
// issue
// ============================================================================
struct Error : public Expression {
std::string msg;
std::unique_ptr<Keyword> tag;
Error(const LocationRange &loc, std::unique_ptr<Keyword> tag,
llvm::StringRef msg);
Error(Error &e);
TypeID getType() const override;
std::string toString() const override;
~Error() = default;
static bool classof(const Expression *e);
};
/// Create a new `node` of type `T` and forwards any given parameter
/// to the constructor of type `T`. This is the **official way** to create
/// a new `Expression`. Here is an example:
/// \code
/// auto list = make<List>();
/// \endcode
///
/// \param[args] Any argument with any type passed to this function will be
/// passed to the constructor of type T.
/// \return A unique pointer to an Expression
template <typename T, typename... Args>
Node make(Args &&...args) {
return std::make_unique<T>(std::forward<Args>(args)...);
};
/// Create a new `node` of type `T` and forwards any given parameter
/// to the constructor of type `T`. This is the **official way** to create
/// a new `Expression`. Here is an example:
/// \code
/// auto list = makeAndCast<List>();
/// \endcode
///
/// \param[args] Any argument with any type passed to this function will be
/// passed to the constructor of type T.
/// \return A unique pointer to a value of type T.
template <typename T, typename... Args>
std::shared_ptr<T> makeAndCast(Args &&...args) {
return std::make_unique<T>(std::forward<Args>(args)...);
};
/// The helper function to create a new `Node` and returnsit. It should be useds
/// where every we want to return a `MaybeNode` successfully.
template <typename T, typename... Args>
MaybeNode makeSuccessfulNode(Args &&...args) {
return make<T>(std::forward<Args>(args)...);
};
/// The hlper function to creates an Error (`llvm::Error`) by passing all
/// the given arguments to the constructor of the template param `E`.
template <typename E, typename T = Node, typename... Args>
llvm::Expected<T> makeErrorful(Args &&...args) {
return llvm::make_error<E>(std::forward<Args>(args)...);
};
/// The hlper function to creates an Error (`llvm::Error`) by passing all
/// the given arguments to the constructor of the template param `E`.
template <typename E, typename... Args>
MaybeNode makeErrorNode(Args &&...args) {
return makeErrorful<E, Node>(std::forward<Args>(args)...);
};
/// Converts the given AST to string and prints it out
void dump(Ast &);
} // namespace serene::ast
#endif

80
serene/src/environment.h Normal file
View File

@ -0,0 +1,80 @@
/* -*- C++ -*-
* Serene Programming Language
*
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef ENVIRONMENT_H
#define ENVIRONMENT_H
#include "utils.h"
#include <llvm/ADT/StringMap.h>
#include <mlir/Support/LogicalResult.h>
namespace serene {
/// This class represents a classic lisp environment (or scope) that holds the
/// bindings from type `K` to type `V`. For example an environment of symbols
/// to expressions would be `Environment<Symbol, Node>`
template <typename V>
class Environment {
Environment<V> *parent;
using StorageType = llvm::StringMap<V>;
// The actual bindings storage
StorageType pairs;
public:
Environment() : parent(nullptr) {}
explicit Environment(Environment *parent) : parent(parent){};
/// Look up the given `key` in the environment and return it.
std::optional<V> lookup(llvm::StringRef key) {
if (auto value = pairs.lookup(key)) {
return value;
}
if (parent) {
return parent->lookup(key);
}
return std::nullopt;
};
/// Insert the given `key` with the given `value` into the storage. This
/// operation will shadow an aleady exist `key` in the parent environment
mlir::LogicalResult insert_symbol(llvm::StringRef key, V value) {
auto result = pairs.insert_or_assign(key, value);
UNUSED(result);
return mlir::success();
};
inline typename StorageType::iterator begin() { return pairs.begin(); }
inline typename StorageType::iterator end() { return pairs.end(); }
inline typename StorageType::const_iterator begin() const {
return pairs.begin();
}
inline typename StorageType::const_iterator end() const {
return pairs.end();
}
};
} // namespace serene
#endif

View File

@ -119,6 +119,15 @@ class JIT {
llvm::Error createCurrentProcessJD();
// Anonymous function counter. We need to assing a unique name to each
// anonymous function and we use this counter to generate those names
std::atomic<uint> fn_counter = 0;
// Since indexing namespaces by the name would be inefficient, We use
// unsigned integer and assign a number to all the namespaces at the
// creation time. Namespace IDs have to be unique.
std::atomic<uint> ns_counter = 0;
public:
JIT(llvm::orc::JITTargetMachineBuilder &&jtmb, std::unique_ptr<Options> opts);
static MaybeJIT make(llvm::orc::JITTargetMachineBuilder &&jtmb,
@ -151,6 +160,8 @@ public:
void setLoadPaths(std::vector<const char *> &dirs) { loadPaths.swap(dirs); };
/// Return the load paths for namespaces
llvm::ArrayRef<const char *> getLoadPaths() { return loadPaths; };
const Options &getOptions() const { return *options; };
};
MaybeJIT makeJIT(std::unique_ptr<Options> opts);

88
serene/src/location.h Normal file
View File

@ -0,0 +1,88 @@
/* -*- C++ -*-
* Serene Programming Language
*
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef LOCATION_H
#define LOCATION_H
#include <mlir/IR/Diagnostics.h>
#include <mlir/IR/Location.h>
#include <string>
namespace serene {
/// It represents a location in the input string to the parser via `line`,
struct Location {
/// Since namespaces are our unit of compilation, we need to have
/// a namespace in hand
llvm::StringRef ns;
std::optional<llvm::StringRef> filename = std::nullopt;
/// A pointer to the character that this location is pointing to
/// it the input buffer
const char *c = nullptr;
/// At this stage we only support 65535 lines of code in each file
unsigned short int line = 0;
/// At this stage we only support 65535 chars in each line
unsigned short int col = 0;
bool knownLocation = true;
::std::string toString() const;
Location() = default;
explicit Location(llvm::StringRef ns,
std::optional<llvm::StringRef> fname = std::nullopt,
const char *c = nullptr, unsigned short int line = 0,
unsigned short int col = 0, bool knownLocation = true)
: ns(ns), filename(fname), c(c), line(line), col(col),
knownLocation(knownLocation){};
Location clone() const;
// mlir::Location toMLIRLocation(mlir::MLIRContext &ctx);
/// Returns an unknown location for the given \p ns.
static Location UnknownLocation(llvm::StringRef ns) {
return Location(ns, std::nullopt, nullptr, 0, 0, false);
}
};
class LocationRange {
public:
Location start;
Location end;
LocationRange() = default;
explicit LocationRange(Location _start) : start(_start), end(_start){};
LocationRange(Location _start, Location _end) : start(_start), end(_end){};
// LocationRange(const LocationRange &);
bool isKnownLocation() const { return start.knownLocation; };
static LocationRange UnknownLocation(llvm::StringRef ns) {
return LocationRange(Location::UnknownLocation(ns));
}
};
void incLocation(Location &, const char *);
void decLocation(Location &, const char *);
} // namespace serene
#endif

223
serene/src/namespace.cpp Normal file
View File

@ -0,0 +1,223 @@
/* -*- C++ -*-
* Serene Programming Language
*
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "namespace.h"
#include "jit/jit.h"
#include <llvm/ADT/StringRef.h>
#include <llvm/Support/FormatVariadic.h>
#include <llvm/Support/raw_ostream.h>
#include <mlir/IR/Builders.h>
#include <mlir/IR/BuiltinOps.h>
#include <mlir/IR/Verifier.h>
#include <mlir/Support/LogicalResult.h>
#include <memory>
#include <stdexcept>
#include <string>
using namespace std;
using namespace llvm;
namespace serene {
Namespace::Namespace(jit::JIT &engine, llvm::StringRef ns_name,
std::optional<llvm::StringRef> filename)
: engine(engine), name(ns_name) {
if (filename.has_value()) {
this->filename.emplace(filename.value().str());
}
// Create the root environment
createEnv(nullptr);
};
SemanticEnv &Namespace::createEnv(SemanticEnv *parent) {
auto env = std::make_unique<SemanticEnv>(parent);
environments.push_back(std::move(env));
return *environments.back();
};
SemanticEnv &Namespace::getRootEnv() {
assert(!environments.empty() && "Root env is not created!");
return *environments.front();
};
// mlir::LogicalResult Namespace::define(std::string &name, ast::Node &node) {
// auto &rootEnv = getRootEnv();
// if (failed(rootEnv.insert_symbol(name, node))) {
// return mlir::failure();
// }
// symbolList.push_back(name);
// return mlir::success();
// }
ast::Ast &Namespace::getTree() { return this->tree; }
llvm::Error Namespace::ExpandTree(ast::Ast &ast) {
// If the target phase is just parsing we don't want
// to run the semantic analyzer or anything beyond parser
if (engine.getOptions().compilationPhase == CompilationPhase::Parse) {
// we just want the raw AST
this->tree.insert(this->tree.end(), std::make_move_iterator(ast.begin()),
std::make_move_iterator(ast.end()));
ast.clear();
return llvm::Error::success();
}
// just for now
this->tree.insert(this->tree.end(), std::make_move_iterator(ast.begin()),
std::make_move_iterator(ast.end()));
ast.clear();
// auto &rootEnv = getRootEnv();
// auto state = semantics::makeAnalysisState(*this, rootEnv);
// // Run the semantic analyer on the ast and then if everything
// // is ok add the form to the tree and forms
// auto maybeForm = semantics::analyze(*state, ast);
// if (!maybeForm) {
// return maybeForm.takeError();
// }
// auto semanticAst = std::move(*maybeForm);
// this->tree.insert(this->tree.end(), semanticAst.begin(),
// semanticAst.end());
return llvm::Error::success();
}
// MaybeModuleOp Namespace::generate(unsigned offset) {
// // The reason why we return an optional value instead of Errors
// // is the way MLIR's diagnostic engine works. Passes may use
// // the `emit` function of operations to report errors to the
// // diagnostic engine. So we can't return any error diractly.
// mlir::OpBuilder builder(&ctx.mlirContext);
// // TODO: Fix the unknown location by pointing to the `ns` form
// auto module = mlir::ModuleOp::create(builder.getUnknownLoc(),
// std::optional<llvm::StringRef>(name));
// auto treeSize = getTree().size();
// // Walk the AST and call the `generateIR` function of each node.
// // Since nodes will have access to the a reference of the
// // namespace they can use the builder and keep adding more
// // operations to the module via the builder
// for (unsigned i = offset; i < treeSize; ++i) {
// auto &node = getTree()[i];
// node->generateIR(*this, module);
// }
// if (mlir::failed(mlir::verify(module))) {
// module.emitError("Can't verify the module");
// module.erase();
// return llvm::None;
// }
// if (mlir::failed(runPasses(module))) {
// // TODO: Report a proper error
// module.emitError("Failure in passes!");
// module.erase();
// return llvm::None;
// }
// return MaybeModuleOp(module);
// }
// mlir::LogicalResult Namespace::runPasses(mlir::ModuleOp &m) {
// return ctx.pm.run(m);
// };
// void Namespace::dump() {
// llvm::outs() << "\nMLIR: \n";
// auto maybeModuleOp = generate();
// if (!maybeModuleOp) {
// llvm::errs() << "Failed to generate the IR.\n";
// return;
// }
// mlir::OpPrintingFlags flags;
// flags.enableDebugInfo();
// maybeModuleOp.getValue()->print(llvm::outs(), flags);
// };
// MaybeModule Namespace::compileToLLVM() {
// // The reason why we return an optional value instead of Errors
// // is the way MLIR's diagnostic engine works. Passes may use
// // the `emit` function of operations to report errors to the
// // diagnostic engine. So we can't return any error diractly.
// auto maybeModule = generate();
// if (!maybeModule) {
// NAMESPACE_LOG("IR generation failed for '" << name << "'");
// return llvm::None;
// }
// if (ctx.getTargetPhase() >= CompilationPhase::IR) {
// mlir::ModuleOp module = maybeModule.getValue().get();
// return ::serene::slir::compileToLLVMIR(ctx, module);
// }
// return llvm::None;
// };
// MaybeModule Namespace::compileToLLVMFromOffset(unsigned offset) {
// // The reason why we return an optional value instead of Errors
// // is the way MLIR's diagnostic engine works. Passes may use
// // the `emit` function of operations to report errors to the
// // diagnostic engine. So we can't return any error diractly.
// auto maybeModule = generate(offset);
// if (!maybeModule) {
// NAMESPACE_LOG("IR generation failed for '" << name << "'");
// return llvm::None;
// }
// if (ctx.getTargetPhase() >= CompilationPhase::IR) {
// mlir::ModuleOp module = maybeModule.getValue().get();
// return ::serene::slir::compileToLLVMIR(ctx, module);
// }
// return llvm::None;
// };
NSPtr Namespace::make(jit::JIT &engine, llvm::StringRef name,
std::optional<llvm::StringRef> filename) {
return std::make_unique<Namespace>(engine, name, filename);
};
Namespace::~Namespace() {
// TODO: Clean up anything related to this namespace in the context
// TODO: Remove anything related to this namespace in the JIT
NAMESPACE_LOG("Destructing NS: " << name);
};
} // namespace serene

140
serene/src/namespace.h Normal file
View File

@ -0,0 +1,140 @@
/* -*- C++ -*-
* Serene Programming Language
*
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* Commentary:
* Rules of a namespace:
* - A namespace has have a name and it has to own it.
* - A namespace may or may not be associated with a file
* - The internal AST of a namespace is an evergrowing tree which may expand at
* any given time. For example via iteration of a REPL
* - `environments` vector is the owner of all the semantic envs
* - The first env in the `environments` is the root env.
*
* How to create a namespace ?
* The official way to create a namespace object is to use the `SereneContext`
* object and call `readNamespace`, `importNamespace` or `makeNamespace`.
*/
// TODO: Add a mechanism to figure out whether a namespace has changed or not
// either on memory or disk
#ifndef NAMESPACE_H
#define NAMESPACE_H
#include "ast.h"
#include "environment.h"
#include "utils.h"
#include <llvm/ADT/SmallString.h>
#include <llvm/ADT/SmallVector.h>
#include <llvm/ADT/StringRef.h>
#include <llvm/ADT/Twine.h>
#include <llvm/ExecutionEngine/Orc/Core.h>
#include <llvm/ExecutionEngine/Orc/ThreadSafeModule.h>
#include <llvm/IR/Module.h>
#include <llvm/Support/Error.h>
#include <mlir/IR/Builders.h>
#include <mlir/IR/BuiltinOps.h>
#include <mlir/IR/OwningOpRef.h>
#include <mlir/IR/Value.h>
#include <mlir/Support/LogicalResult.h>
#include <atomic>
#include <cstddef>
#include <memory>
#include <string>
#define NAMESPACE_LOG(...) \
DEBUG_WITH_TYPE("NAMESPACE", llvm::dbgs() << __VA_ARGS__ << "\n");
namespace serene {
namespace jit {
class JIT;
} // namespace jit
class Namespace;
using NSPtr = std::unique_ptr<Namespace>;
using MaybeNS = llvm::Expected<NSPtr>;
using SemanticEnv = Environment<ast::Node>;
using SemanticEnvPtr = std::unique_ptr<SemanticEnv>;
using SemanticEnvironments = std::vector<SemanticEnvPtr>;
/// Serene's namespaces are the unit of compilation. Any code that needs to be
/// compiled has to be in a namespace. The official way to create a new
/// namespace is to use the `readNamespace`, `importNamespace` and
/// `makeNamespace` member functions of `SereneContext`.
class Namespace {
jit::JIT &engine;
/// The content of the namespace. It should alway hold a semantically
/// correct AST. It means thet the AST that we want to store here has
/// to pass the semantic analyzer checks.
ast::Ast tree;
SemanticEnvironments environments;
std::vector<llvm::StringRef> symbolList;
public:
std::string name;
std::optional<std::string> filename;
/// Create a naw namespace with the given `name` and optional `filename` and
/// return a unique pointer to it in the given Serene context.
static NSPtr make(jit::JIT &engine, llvm::StringRef name,
std::optional<llvm::StringRef> filename);
Namespace(jit::JIT &engine, llvm::StringRef ns_name,
std::optional<llvm::StringRef> filename);
/// Create a new environment with the give \p parent as the parent,
/// push the environment to the internal environment storage and
/// return a reference to it. The namespace itself is the owner of
/// environments.
SemanticEnv &createEnv(SemanticEnv *parent);
/// Return a referenece to the top level (root) environment of ns.
SemanticEnv &getRootEnv();
/// Define a new binding in the root environment with the given \p name
/// and the given \p node. Defining a new binding with a name that
/// already exists in legal and will overwrite the previous binding and
/// the given name will point to a new value from now on.
mlir::LogicalResult define(std::string &name, ast::Node &node);
/// Add the given \p ast to the namespace and return any possible error.
/// The given \p ast will be added to a vector of ASTs by expanding
/// the tree vector to contain \p ast.
///
/// This function runs the semantic analyzer on the \p ast as well.
llvm::Error ExpandTree(ast::Ast &ast);
ast::Ast &getTree();
const std::vector<llvm::StringRef> &getSymList() { return symbolList; };
/// Dumps the namespace with respect to the compilation phase
// void dump();
~Namespace();
};
} // namespace serene
#endif

View File

@ -65,7 +65,7 @@ struct Options {
// appropriate code for the host. If the same function has to be part
// of the runtime, then we use `targetTriple` again to generate the code
// for the target platform. So, we might end up with two version of the
// same function
// same function.
const llvm::Triple hostTriple;
CompilationPhase compilationPhase = CompilationPhase::NoOptimization;

431
serene/src/reader.cpp Normal file
View File

@ -0,0 +1,431 @@
/* -*- C++ -*-
* Serene Programming Language
*
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "reader.h"
// #include "serene/errors.h"
// #include "serene/exprs/expression.h"
// #include "serene/exprs/list.h"
// #include "serene/exprs/number.h"
// #include "serene/exprs/symbol.h"
// #include "serene/namespace.h"
// #include "serene/utils.h"
#include <llvm/ADT/StringRef.h>
#include <llvm/Support/Error.h>
#include <llvm/Support/ErrorHandling.h>
#include <llvm/Support/ErrorOr.h>
#include <llvm/Support/FormatVariadic.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Support/SMLoc.h>
#include <mlir/IR/Diagnostics.h>
#include <mlir/IR/Location.h>
#include <mlir/IR/MLIRContext.h>
#include <mlir/Support/LogicalResult.h>
#include <assert.h>
#include <cctype>
#include <fstream>
#include <memory>
#include <string>
namespace serene {
namespace reader {
// LocationRange::LocationRange(const LocationRange &loc) {
// start = loc.start.clone();
// end = loc.end.clone();
// }
/// Return the string represenation of the location.
std::string Location::toString() const {
return llvm::formatv("{0}:{1}", line, col);
};
Location Location::clone() const {
return Location{ns, filename, c, line, col, knownLocation};
}
/// Increase the given location by one and set the line/col value in respect to
/// the `newline` in place.
/// \param loc The `Location` data
/// \param c A pointer to the current char that the location has to point to
void incLocation(Location &loc, const char *c) {
// TODO: Handle the end of line with respect to the OS.
// increase the current position in the buffer with respect to the end
// of line.
auto newline = *c == '\n';
if (!newline) {
loc.col++;
} else {
loc.line++;
loc.col = 0;
}
}
/// decrease the given location by one and set the line/col value in respect to
/// the `newline` in place.
/// \param loc The `Location` data
/// \param c A pointer to the current char that the location has to point to
void decLocation(Location &loc, const char *c) {
// TODO: Handle the end of line with respect to the OS.
// increase the current position in the buffer with respect to the end
// of line.
auto newline = *c == '\n';
if (newline) {
loc.line = loc.line == 0 ? 0 : loc.line - 1;
// We don't move back the `col` value because we simply don't know it
} else {
loc.col = loc.col == 0 ? 0 : loc.col - 1;
}
}
Reader::Reader(SereneContext &ctx, llvm::StringRef buffer, llvm::StringRef ns,
std::optional<llvm::StringRef> filename)
: ctx(ctx), ns(ns), filename(filename), buf(buffer),
currentLocation(Location(ns, filename)) {
UNUSED(this->ctx);
READER_LOG("Setting the first char of the buffer");
currentChar = buf.begin() - 1;
currentPos = 1;
currentLocation.line = 1;
currentLocation.col = 1;
};
Reader::Reader(SereneContext &ctx, llvm::MemoryBufferRef buffer,
llvm::StringRef ns, std::optional<llvm::StringRef> filename)
: Reader(ctx, buffer.getBuffer(), ns, filename){};
Reader::~Reader() { READER_LOG("Destroying the reader"); }
void Reader::advanceByOne() {
currentChar++;
currentPos++;
currentLocation.col++;
if (*currentChar == '\n') {
READER_LOG("Detected end of line");
if (readEOL) {
currentLocation.col = 1;
currentLocation.line++;
}
readEOL = true;
} else {
if (readEOL) {
currentLocation.line++;
currentLocation.col = 1;
}
readEOL = false;
}
READER_LOG("Moving to Char: " << *currentChar << " at location: "
<< currentLocation.toString());
};
void Reader::advance(bool skipWhitespace) {
if (skipWhitespace) {
for (;;) {
const auto *next = currentChar + 1;
if (isspace(*next) == 0) {
return;
}
advanceByOne();
}
} else {
advanceByOne();
}
};
const char *Reader::nextChar(bool skipWhitespace, unsigned count) {
if (!skipWhitespace) {
READER_LOG("Next char: " << *(currentChar + count));
return currentChar + count;
}
const auto *c = currentChar + 1;
while (isspace(*c) != 0) {
c++;
};
READER_LOG("Next char: " << *c);
return c;
};
bool Reader::isEndOfBuffer(const char *c) {
return *c == '\0' || currentPos > buf.size() || ((const int)*c == EOF);
};
Location Reader::getCurrentLocation() { return currentLocation.clone(); };
/// A predicate function indicating whether the given char `c` is a valid
/// char for the starting point of a symbol or not.
bool Reader::isValidForIdentifier(char c) {
switch (c) {
case '!':
case '$':
case '%':
case '&':
case '*':
case '+':
case '-':
case '.':
case '~':
case '/':
case ':':
case '<':
case '=':
case '>':
case '?':
case '@':
case '^':
case '_':
return true;
}
return std::isalnum(c) != 0;
}
/// Reads a number,
/// \param neg whether to read a negative number or not.
exprs::MaybeNode Reader::readNumber(bool neg) {
READER_LOG("Reading a number...");
std::string number(neg ? "-" : "");
bool floatNum = false;
bool empty = false;
const auto *c = nextChar();
advance();
LocationRange loc(getCurrentLocation());
if (isdigit(*c) == 0) {
return errors::makeError(ctx, errors::InvalidDigitForNumber, loc);
}
for (;;) {
number += *c;
c = nextChar(false);
empty = false;
if ((isdigit(*c) != 0) || *c == '.') {
if (*c == '.' && floatNum) {
loc = LocationRange(getCurrentLocation());
return errors::makeError(ctx, errors::TwoFloatPoints, loc);
}
if (*c == '.') {
floatNum = true;
}
advance();
continue;
}
break;
}
if (((std::isalpha(*c) != 0) && !empty) || empty) {
advance();
loc.start = getCurrentLocation();
return errors::makeError(ctx, errors::InvalidDigitForNumber, loc);
}
loc.end = getCurrentLocation();
return exprs::make<exprs::Number>(loc, number, neg, floatNum);
};
/// Reads a symbol. If the symbol looks like a number
/// If reads it as number
exprs::MaybeNode Reader::readSymbol() {
READER_LOG("Reading a symbol...");
LocationRange loc;
const auto *c = nextChar();
if (!this->isValidForIdentifier(*c) || isEndOfBuffer(c) ||
(isspace(*c) != 0)) {
advance();
loc = LocationRange(getCurrentLocation());
std::string msg;
if (*c == ')') {
msg = "An extra ')' is detected.";
}
return errors::makeError(ctx, errors::InvalidCharacterForSymbol, loc, msg);
}
if (*c == '-') {
const auto *next = nextChar(false, 2);
if (isdigit(*next) != 0) {
// Swallow the -
advance();
return readNumber(true);
}
}
if (isdigit(*c) != 0) {
return readNumber(false);
}
std::string sym;
advance();
for (;;) {
sym += *c;
c = nextChar();
if (!isEndOfBuffer(c) &&
((((isspace(*c)) == 0) && this->isValidForIdentifier(*c)))) {
advance();
continue;
}
break;
}
// TODO: Make sure that the symbol has 0 or 1 '/'.
// TODO: Make sure that `/` is not at the start or at the end of the symbol
loc.end = getCurrentLocation();
return exprs::makeSuccessfulNode<exprs::Symbol>(loc, sym, this->ns);
};
/// Reads a list recursively
exprs::MaybeNode Reader::readList() {
READER_LOG("Reading a list...");
const auto *c = nextChar();
advance();
auto list = exprs::makeAndCast<exprs::List>(getCurrentLocation());
// TODO: Replace the assert with an actual check.
assert(*c == '(');
bool list_terminated = false;
do {
const auto *c = nextChar(true);
if (isEndOfBuffer(c)) {
advance(true);
advance();
list->location.end = getCurrentLocation();
return errors::makeError(ctx, errors::EOFWhileScaningAList,
list->location);
}
switch (*c) {
case ')':
advance(true);
advance();
list_terminated = true;
list->location.end = getCurrentLocation();
break;
default:
advance(true);
auto expr = readExpr();
if (!expr) {
return expr;
}
list->append(*expr);
}
} while (!list_terminated);
return list;
};
/// Reads an expression by dispatching to the proper reader function.
exprs::MaybeNode Reader::readExpr() {
const auto *c = nextChar(true);
READER_LOG("Read char at `readExpr`: " << *c);
if (isEndOfBuffer(c)) {
return exprs::EmptyNode;
}
switch (*c) {
case '(': {
advance(true);
return readList();
}
default:
advance(true);
return readSymbol();
}
};
/// Reads all the expressions in the reader's buffer as an AST.
/// Each expression type (from the reader perspective) has a
/// reader function.
exprs::MaybeAst Reader::read() {
for (size_t current_pos = 0; current_pos < buf.size();) {
const auto *c = nextChar(true);
if (isEndOfBuffer(c)) {
break;
}
advance(true);
auto tmp = readExpr();
if (tmp) {
if (*tmp == nullptr) {
break;
}
this->ast.push_back(std::move(*tmp));
} else {
return tmp.takeError();
}
}
return std::move(this->ast);
};
exprs::MaybeAst read(SereneContext &ctx, const llvm::StringRef input,
llvm::StringRef ns,
std::optional<llvm::StringRef> filename) {
reader::Reader r(ctx, input, ns, filename);
auto ast = r.read();
return ast;
}
exprs::MaybeAst read(SereneContext &ctx, const llvm::MemoryBufferRef input,
llvm::StringRef ns,
std::optional<llvm::StringRef> filename) {
reader::Reader r(ctx, input, ns, filename);
auto ast = r.read();
return ast;
}
} // namespace reader
} // namespace serene

112
serene/src/reader.h Normal file
View File

@ -0,0 +1,112 @@
/* -*- C++ -*-
* Serene Programming Language
*
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* Commentary:
* `Reader` is the base parser class and accepts a buffer like object (usually
* `llvm::StringRef`) as the input and parses it to create an AST (look at the
* `serene::exprs::Expression` class).
*
* The parsing algorithm is quite simple and it is a LL(2). It means that, we
* start parsing the input from the very first character and parse the input
* one char at a time till we reach the end of the input. Please note that
* when we call the `advance` function to move forward in the buffer, we
* can't go back. In order to look ahead in the buffer without moving in the
* buffer we use the `nextChar` method.
*
* We have dedicated methods to read different forms like `list`, `symbol`
* `number` and etc. Each of them return a `MaybeNode` that in the success
* case contains the node and an `Error` on the failure case.
*/
#ifndef READER_H
#define READER_H
#include "ast.h"
#include "location.h"
#include <llvm/ADT/StringRef.h>
#include <llvm/Support/MemoryBufferRef.h>
namespace serene {
/// Base reader class which reads from a string directly.
class Reader {
private:
llvm::StringRef ns;
std::optional<llvm::StringRef> filename;
const char *currentChar = nullptr;
llvm::StringRef buf;
/// The position tracker that we will use to determine the end of the
/// buffer since the buffer might not be null terminated
size_t currentPos = static_cast<size_t>(-1);
Location currentLocation;
bool readEOL = false;
/// Returns a clone of the current location
Location getCurrentLocation();
/// Returns the next character from the stream.
/// @param skip_whitespace An indicator to whether skip white space like chars
/// or not
void advance(bool skipWhitespace = false);
void advanceByOne();
const char *nextChar(bool skipWhitespace = false, unsigned count = 1);
/// Returns a boolean indicating whether the given input character is valid
/// for an identifier or not.
static bool isValidForIdentifier(char c);
// The property to store the ast tree
Ast ast;
MaybeNode readSymbol();
MaybeNode readNumber(bool);
MaybeNode readList();
MaybeNode readExpr();
bool isEndOfBuffer(const char *);
public:
Reader(llvm::StringRef buf, llvm::StringRef ns,
std::optional<llvm::StringRef> filename);
Reader(llvm::MemoryBufferRef buf, llvm::StringRef ns,
std::optional<llvm::StringRef> filename);
// void setInput(const llvm::StringRef string);
/// Parses the the input and creates a possible AST out of it or errors
/// otherwise.
MaybeAst read();
~Reader();
};
/// Parses the given `input` string and returns a `Result<ast>`
/// which may contains an AST or an `llvm::Error`
MaybeAst read(llvm::StringRef input, llvm::StringRef ns,
std::optional<llvm::StringRef> filename);
MaybeAst read(llvm::MemoryBufferRef input, llvm::StringRef ns,
std::optional<llvm::StringRef> filename);
} // namespace serene
#endif

225
serene/src/source_mgr.cpp Normal file
View File

@ -0,0 +1,225 @@
/* -*- C++ -*-
* Serene Programming Language
*
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "serene/source_mgr.h"
#include "serene/namespace.h"
#include "serene/reader/location.h"
#include "serene/reader/reader.h"
#include "serene/utils.h"
#include <system_error>
#include <llvm/Support/Error.h>
#include <llvm/Support/FormatVariadic.h>
#include <llvm/Support/Locale.h>
#include <llvm/Support/MemoryBufferRef.h>
#include <llvm/Support/Path.h>
#include <llvm/Support/raw_ostream.h>
#include <mlir/Support/LogicalResult.h>
namespace serene {
std::string SourceMgr::convertNamespaceToPath(std::string ns_name) {
std::replace(ns_name.begin(), ns_name.end(), '.', '/');
llvm::SmallString<MAX_PATH_SLOTS> path;
path.append(ns_name);
llvm::sys::path::native(path);
return std::string(path);
};
bool SourceMgr::isValidBufferID(unsigned i) const {
return i != 0 && i <= buffers.size();
};
SourceMgr::MemBufPtr SourceMgr::findFileInLoadPath(const std::string &name,
std::string &importedFile) {
auto path = convertNamespaceToPath(name);
// If the file didn't exist directly, see if it's in an include path.
for (unsigned i = 0, e = loadPaths.size(); i != e; ++i) {
// TODO: Ugh, Udgly, fix this using llvm::sys::path functions
importedFile = loadPaths[i] + llvm::sys::path::get_separator().data() +
path + "." + DEFAULT_SUFFIX;
SMGR_LOG("Try to load the ns from: " + importedFile);
auto newBufOrErr = llvm::MemoryBuffer::getFile(importedFile);
if (auto err = newBufOrErr.getError()) {
llvm::consumeError(llvm::errorCodeToError(err));
continue;
}
return std::move(*newBufOrErr);
}
return nullptr;
};
MaybeNS SourceMgr::readNamespace(SereneContext &ctx, std::string name,
reader::LocationRange importLoc) {
std::string importedFile;
SMGR_LOG("Attempt to load namespace: " + name);
MemBufPtr newBufOrErr(findFileInLoadPath(name, importedFile));
if (newBufOrErr == nullptr) {
auto msg = llvm::formatv("Couldn't find namespace '{0}'", name).str();
return errors::makeError(ctx, errors::NSLoadError, importLoc, msg);
}
auto bufferId = AddNewSourceBuffer(std::move(newBufOrErr), importLoc);
UNUSED(nsTable.insert_or_assign(name, bufferId));
if (bufferId == 0) {
auto msg = llvm::formatv("Couldn't add namespace '{0}'", name).str();
return errors::makeError(ctx, errors::NSAddToSMError, importLoc, msg);
}
// Since we moved the buffer to be added as the source storage we
// need to get a pointer to it again
const auto *buf = getMemoryBuffer(bufferId);
// Read the content of the buffer by passing it the reader
auto maybeAst = reader::read(ctx, buf->getBuffer(), name,
std::optional(llvm::StringRef(importedFile)));
if (!maybeAst) {
SMGR_LOG("Couldn't Read namespace: " + name);
return maybeAst.takeError();
}
// Create the NS and set the AST
auto ns =
ctx.makeNamespace(name, std::optional(llvm::StringRef(importedFile)));
if (auto errs = ns->addTree(*maybeAst)) {
SMGR_LOG("Couldn't set the AST for namespace: " + name);
return errs;
}
return ns;
};
unsigned SourceMgr::AddNewSourceBuffer(std::unique_ptr<llvm::MemoryBuffer> f,
reader::LocationRange includeLoc) {
SrcBuffer nb;
nb.buffer = std::move(f);
nb.importLoc = includeLoc;
buffers.push_back(std::move(nb));
return buffers.size();
};
template <typename T>
static std::vector<T> &GetOrCreateOffsetCache(void *&offsetCache,
llvm::MemoryBuffer *buffer) {
if (offsetCache) {
return *static_cast<std::vector<T> *>(offsetCache);
}
// Lazily fill in the offset cache.
auto *offsets = new std::vector<T>();
size_t sz = buffer->getBufferSize();
// TODO: Replace this assert with a realtime check
assert(sz <= std::numeric_limits<T>::max());
llvm::StringRef s = buffer->getBuffer();
for (size_t n = 0; n < sz; ++n) {
if (s[n] == '\n') {
offsets->push_back(static_cast<T>(n));
}
}
offsetCache = offsets;
return *offsets;
}
template <typename T>
const char *SourceMgr::SrcBuffer::getPointerForLineNumberSpecialized(
unsigned lineNo) const {
std::vector<T> &offsets =
GetOrCreateOffsetCache<T>(offsetCache, buffer.get());
// We start counting line and column numbers from 1.
if (lineNo != 0) {
--lineNo;
}
const char *bufStart = buffer->getBufferStart();
// The offset cache contains the location of the \n for the specified line,
// we want the start of the line. As such, we look for the previous entry.
if (lineNo == 0) {
return bufStart;
}
if (lineNo > offsets.size()) {
return nullptr;
}
return bufStart + offsets[lineNo - 1] + 1;
}
/// Return a pointer to the first character of the specified line number or
/// null if the line number is invalid.
const char *
SourceMgr::SrcBuffer::getPointerForLineNumber(unsigned lineNo) const {
size_t sz = buffer->getBufferSize();
if (sz <= std::numeric_limits<uint8_t>::max()) {
return getPointerForLineNumberSpecialized<uint8_t>(lineNo);
}
if (sz <= std::numeric_limits<uint16_t>::max()) {
return getPointerForLineNumberSpecialized<uint16_t>(lineNo);
}
if (sz <= std::numeric_limits<uint32_t>::max()) {
return getPointerForLineNumberSpecialized<uint32_t>(lineNo);
}
return getPointerForLineNumberSpecialized<uint64_t>(lineNo);
}
SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&other) noexcept
: buffer(std::move(other.buffer)), offsetCache(other.offsetCache),
importLoc(other.importLoc) {
other.offsetCache = nullptr;
}
SourceMgr::SrcBuffer::~SrcBuffer() {
if (offsetCache != nullptr) {
size_t sz = buffer->getBufferSize();
if (sz <= std::numeric_limits<uint8_t>::max()) {
delete static_cast<std::vector<uint8_t> *>(offsetCache);
} else if (sz <= std::numeric_limits<uint16_t>::max()) {
delete static_cast<std::vector<uint16_t> *>(offsetCache);
} else if (sz <= std::numeric_limits<uint32_t>::max()) {
delete static_cast<std::vector<uint32_t> *>(offsetCache);
} else {
delete static_cast<std::vector<uint64_t> *>(offsetCache);
}
offsetCache = nullptr;
}
}
}; // namespace serene

190
serene/src/source_mgr.h Normal file
View File

@ -0,0 +1,190 @@
/* -*- C++ -*-
* Serene Programming Language
*
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef SERENE_SOURCE_MGR_H
#define SERENE_SOURCE_MGR_H
#include "location.h"
#include <llvm/ADT/SmallVector.h>
#include <llvm/ADT/StringMap.h>
#include <llvm/Support/ErrorHandling.h>
#include <llvm/Support/ErrorOr.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Support/SourceMgr.h>
#include <mlir/IR/Diagnostics.h>
#include <mlir/Support/Timing.h>
#include <memory>
#include <string>
#define SMGR_LOG(...) \
DEBUG_WITH_TYPE("sourcemgr", llvm::dbgs() \
<< "[SMGR]: " << __VA_ARGS__ << "\n");
namespace serene {
class SereneContext;
/// This class is quite similar to the `llvm::SourceMgr` in functionality. We
/// even borrowed some of the code from the original implementation but removed
/// a lot of code that were irrelevant to us.
///
/// SouceMgr is responsible for finding a namespace in the `loadPaths` and read
/// the content of the `.srn` (or any of the `DEFAULT_SUFFIX`) into a
/// `llvm::MemoryBuffer` embedded in a `SrcBuffer` object as the owner of the
/// source files and then it will call the `reader` on the buffer to parse it
/// and create the actual `Namespace` object from the parsed AST.
///
/// Later on, whenever we need to refer to the source file of a namespace for
/// diagnosis purposes or any other purpose we can use the functions in this
/// class to get hold of a pointer to a specific `Location` of the
/// buffer.
///
/// Note: Unlike the original version, SourceMgr does not handle the diagnostics
/// and it uses the Serene's `DiagnosticEngine` for that matter.
class SourceMgr {
public:
// TODO: Make it a vector of supported suffixes
constexpr static const char *DEFAULT_SUFFIX = "srn";
private:
struct SrcBuffer {
/// The memory buffer for the file.
std::unique_ptr<llvm::MemoryBuffer> buffer;
/// Vector of offsets into Buffer at which there are line-endings
/// (lazily populated). Once populated, the '\n' that marks the end of
/// line number N from [1..] is at Buffer[OffsetCache[N-1]]. Since
/// these offsets are in sorted (ascending) order, they can be
/// binary-searched for the first one after any given offset (eg. an
/// offset corresponding to a particular SMLoc).
///
/// Since we're storing offsets into relatively small files (often smaller
/// than 2^8 or 2^16 bytes), we select the offset vector element type
/// dynamically based on the size of Buffer.
mutable void *offsetCache = nullptr;
/// Look up a given \p ptr in in the buffer, determining which line it came
/// from.
unsigned getLineNumber(const char *ptr) const;
template <typename T>
unsigned getLineNumberSpecialized(const char *ptr) const;
/// Return a pointer to the first character of the specified line number or
/// null if the line number is invalid.
const char *getPointerForLineNumber(unsigned lineNo) const;
template <typename T>
const char *getPointerForLineNumberSpecialized(unsigned lineNo) const;
/// This is the location of the parent import or unknown location if it is
/// the main namespace
LocationRange importLoc;
SrcBuffer() = default;
SrcBuffer(SrcBuffer &&) noexcept;
SrcBuffer(const SrcBuffer &) = delete;
SrcBuffer &operator=(const SrcBuffer &) = delete;
~SrcBuffer();
};
using MemBufPtr = std::unique_ptr<llvm::MemoryBuffer>;
/// This is all of the buffers that we are reading from.
std::vector<SrcBuffer> buffers;
/// A hashtable that works as an index from namespace names to the buffer
/// position it the `buffer`
llvm::StringMap<unsigned> nsTable;
// This is the list of directories we should search for include files in.
std::vector<std::string> loadPaths;
// Find a namespace file with the given \p name in the load path and \r retuns
// a unique pointer to the memory buffer containing the content or an error.
// In the success case it will put the path of the file into the \p
// importedFile.
MemBufPtr findFileInLoadPath(const std::string &name,
std::string &importedFile);
bool isValidBufferID(unsigned i) const;
/// Converts the ns name to a partial path by replacing the dots with slashes
static std::string convertNamespaceToPath(std::string ns_name);
public:
SourceMgr() = default;
SourceMgr(const SourceMgr &) = delete;
SourceMgr &operator=(const SourceMgr &) = delete;
SourceMgr(SourceMgr &&) = default;
SourceMgr &operator=(SourceMgr &&) = default;
~SourceMgr() = default;
/// Set the `loadPaths` to the given \p dirs. `loadPaths` is a vector of
/// directories that Serene will look in order to find a file that constains a
/// namespace which it is looking for.
void setLoadPaths(std::vector<std::string> &dirs) { loadPaths.swap(dirs); }
/// Return a reference to a `SrcBuffer` with the given ID \p i.
const SrcBuffer &getBufferInfo(unsigned i) const {
assert(isValidBufferID(i));
return buffers[i - 1];
}
/// Return a reference to a `SrcBuffer` with the given namspace name \p ns.
const SrcBuffer &getBufferInfo(llvm::StringRef ns) const {
auto bufferId = nsTable.lookup(ns);
if (bufferId == 0) {
// No such namespace
llvm_unreachable("couldn't find the src buffer for a namespace. It "
"should never happen.");
}
return buffers[bufferId - 1];
}
/// Return a pointer to the internal `llvm::MemoryBuffer` of the `SrcBuffer`
/// with the given ID \p i.
const llvm::MemoryBuffer *getMemoryBuffer(unsigned i) const {
assert(isValidBufferID(i));
return buffers[i - 1].buffer.get();
}
unsigned getNumBuffers() const { return buffers.size(); }
/// Add a new source buffer to this source manager. This takes ownership of
/// the memory buffer.
unsigned AddNewSourceBuffer(std::unique_ptr<llvm::MemoryBuffer> f,
LocationRange includeLoc);
/// Lookup for a file containing the namespace definition of with given
/// namespace name \p name. In case that the file exists, it returns an
/// `ErrorTree`. It will use the parser to read the file and create an AST
/// from it. Then create a namespace, set the its AST to the AST that we just
/// read from the file and return a shared pointer to the namespace.
///
/// \p importLoc is a location in the source code where the give namespace is
/// imported.
MaybeNS readNamespace(SereneContext &ctx, std::string name,
LocationRange importLoc);
};
}; // namespace serene
#endif

86
serene/src/types.h Normal file
View File

@ -0,0 +1,86 @@
/* -*- C -*-
* Serene Programming Language
*
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TYPES_H
#define TYPES_H
#include "serene/config.h"
#include <cstdint>
typedef struct {
const TypeID id;
const char *name;
} Type;
typedef struct {
const Type type;
const void *data;
} Object;
static const Type type = {.id = TYPE, .name = "type"};
static const Type nil_type = {.id = NIL, .name = "nil"};
static const Type function_type = {.id = FN, .name = "function"};
static const Type protocol_type = {.id = PROTOCOL, .name = "protocol"};
static const Type int_type = {.id = INT, .name = "int"};
static const Type list_type = {.id = LIST, .name = "list"};
typedef struct {
const Type type;
const Type **args;
const Type *returnType;
} FunctionType;
typedef struct {
const Type type;
const char *name;
const FunctionType **functions;
} ProtocolType;
typedef struct {
const Type type;
const Type first;
const Type second;
} PairType;
typedef struct {
const PairType type;
void *first;
void *second;
} Pair;
typedef struct {
const Pair *head;
const unsigned int len;
} List;
typedef struct {
const char *name;
} Symbol;
typedef struct {
const char *data;
const unsigned int len;
} String;
typedef struct {
const long data;
} Number;
#endif

138
serene/src/utils.h Normal file
View File

@ -0,0 +1,138 @@
/* -*- C++ -*-
* Serene Programming Language
*
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef UTILS_H
#define UTILS_H
#include <llvm/Support/Error.h>
#include <variant>
// Sometimes we need this to make both analyzer happy
// and the fn signature right.
#define UNUSED(x) (void)(x)
// We use this value with llvm::SmallString<MAX_PATH_SLOTS>
#define MAX_PATH_SLOTS 256
// C++17 required. We can't go back to 14 any more :))
namespace serene {
/// A similar type to Rust's Result data structure. It either holds a value of
/// type `T` successfully or holds a value of type `E` errorfully. It is
/// designed to be used in situations which the return value of a function might
/// contains some errors. The official way to use this type is to use the
/// factory functions `Success` and `Error`. For example:
///
/// \code
/// auto successfulResult = Result<int>::success(3);
/// auto notOkResult = Result<int>::error(SomeLLVMError());
// \endcode
///
/// In order check for a value being errorful or successful checkout the `ok`
/// method or simply use the value as a conditiona.
///
/// This class is setup in a way tha you can us a value of type `T` in places
/// that the compiler expects a `Result<T>`. So for example:
///
/// \code
/// Result<int> fn() {return 2;}
/// \endcode
///
/// works perfectly.
template <typename T, typename E = llvm::Error>
class Result {
// The actual data container
std::variant<T, E> contents;
/// The main constructor which we made private to avoid ambiguousness in
/// input type. `Success` and `Error` call this ctor.
template <typename InPlace, typename Content>
Result(InPlace i, Content &&c) : contents(i, std::forward<Content>(c)){};
public:
explicit constexpr Result(const T &v)
: Result(std::in_place_index_t<0>(), std::move(v)){};
/// Return a pointer to the success case value of the result. It is
/// important to check for the success case before calling this function.
constexpr const T *getPointer() const { return &getValue(); }
/// Return a pointer to the success case value of the result. It is
/// important to check for the success case before calling this function.
T *getPointer() { return &getValue(); }
/// Return a pointer to the success case value of the result. It is
/// important to check for the success case before calling this function.
T *operator->() { return getPointer(); }
/// Return a pointer to the success case value of the result. It is
/// important to check for the success case before calling this function.
constexpr const T *operator->() const { return getPointer(); }
/// Dereference the success case and returns the value. It is
/// important to check for the success case before calling this function.
constexpr const T &operator*() const & { return getValue(); }
/// Dereference the success case and returns the value. It is
/// important to check for the success case before calling this function.
T &operator*() & { return getValue(); }
/// Create a succesfull result with the given value of type `T`.
static Result success(T v) {
return Result(std::in_place_index_t<0>(), std::move(v));
}
/// Create an errorful result with the given value of type `E` (default
/// `llvm::Error`).
static Result error(E e) {
return Result(std::in_place_index_t<1>(), std::move(e));
}
/// Return the value if it's successful otherwise throw an error
T &&getValue() && { return std::move(std::get<0>(contents)); };
/// Return the error value if it's errorful otherwise throw an error
E &&getError() && { return std::move(std::get<1>(contents)); };
// using std::get, it'll throw if contents doesn't contain what you ask for
/// Return the value if it's successful otherwise throw an error
T &getValue() & { return std::get<0>(contents); };
/// Return the error value if it's errorful otherwise throw an error
E &getError() & { return std::get<1>(contents); };
const T &getValue() const & { return std::get<0>(contents); }
const E &getError() const & { return std::get<1>(contents); }
/// Return the a boolean value indicating whether the value is succesful
/// or errorful.
bool ok() const { return std::holds_alternative<T>(contents); };
operator bool() const { return ok(); }
};
inline void makeFQSymbolName(const llvm::StringRef &ns,
const llvm::StringRef &sym, std::string &result) {
result = (ns + "/" + sym).str();
};
} // namespace serene
#endif