Move over namespace, ast, and the reader
This commit is contained in:
parent
47c52d0488
commit
b1cca14433
|
@ -51,6 +51,8 @@ option(LLVM_USE_PERF "If the target LLVM build is built with LLVM_USE_PERF" OFF)
|
|||
# Only do these if this is the main project, and not if it is included through add_subdirectory
|
||||
if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
|
||||
## Settings =======================
|
||||
|
||||
set(C_STANDARD 17)
|
||||
# specify the C++ standard
|
||||
if (CPP_20_SUPPORT)
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
|
|
|
@ -24,4 +24,31 @@
|
|||
// Should we build the support for MLIR CL OPTIONS?
|
||||
#cmakedefine SERENE_WITH_MLIR_CL_OPTION
|
||||
|
||||
#ifdef __cplusplus
|
||||
enum class TypeID {
|
||||
#else
|
||||
typedef enum {
|
||||
#endif
|
||||
NIL = 0,
|
||||
SYMBOL,
|
||||
TYPE,
|
||||
FN,
|
||||
NUMBER,
|
||||
INT,
|
||||
CSTRING,
|
||||
STRING,
|
||||
KEYWORD,
|
||||
NAMESPACE,
|
||||
LIST,
|
||||
MAP,
|
||||
VECTOR,
|
||||
STRUCT,
|
||||
PROTOCOL,
|
||||
Error,
|
||||
}
|
||||
#ifndef __cplusplus
|
||||
TypeID
|
||||
#endif
|
||||
;
|
||||
|
||||
#endif
|
||||
|
|
|
@ -19,4 +19,8 @@ target_sources(serene PRIVATE
|
|||
|
||||
commands/commands.cpp
|
||||
jit/jit.cpp
|
||||
|
||||
ast.cpp
|
||||
namespace.cpp
|
||||
|
||||
)
|
||||
|
|
|
@ -0,0 +1,167 @@
|
|||
/* -*- C++ -*-
|
||||
* Serene Programming Language
|
||||
*
|
||||
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "ast.h"
|
||||
|
||||
#include <llvm/Support/FormatVariadic.h>
|
||||
|
||||
namespace serene::ast {
|
||||
|
||||
// ============================================================================
|
||||
// Symbol
|
||||
// ============================================================================
|
||||
Symbol::Symbol(const LocationRange &loc, llvm::StringRef name,
|
||||
llvm::StringRef currentNS)
|
||||
: Expression(loc) {
|
||||
// IMPORTANT NOTE: the `name` and `currentNS` should be valid string and
|
||||
// already validated.
|
||||
auto partDelimiter = name.find('/');
|
||||
if (partDelimiter == std::string::npos) {
|
||||
nsName = currentNS.str();
|
||||
this->name = name.str();
|
||||
|
||||
} else {
|
||||
this->name = name.substr(partDelimiter + 1, name.size()).str();
|
||||
nsName = name.substr(0, partDelimiter).str();
|
||||
}
|
||||
};
|
||||
|
||||
Symbol::Symbol(Symbol &s) : Expression(s.location) {
|
||||
this->name = s.name;
|
||||
this->nsName = s.nsName;
|
||||
};
|
||||
|
||||
TypeID Symbol::getType() const { return TypeID::SYMBOL; };
|
||||
|
||||
std::string Symbol::toString() const {
|
||||
return llvm::formatv("<Symbol {0}/{1}>", nsName, name);
|
||||
}
|
||||
|
||||
bool Symbol::classof(const Expression *e) {
|
||||
return e->getType() == TypeID::SYMBOL;
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Number
|
||||
// ============================================================================
|
||||
Number::Number(const LocationRange &loc, const long &num)
|
||||
: Expression(loc), value(num), isNeg(num < 0), isFloat(false){};
|
||||
|
||||
Number::Number(const LocationRange &loc, const double &num)
|
||||
: Expression(loc), value(num), isNeg(num < 0), isFloat(true){};
|
||||
|
||||
Number::Number(Number &n) : Expression(n.location) { this->value = n.value; };
|
||||
|
||||
TypeID Number::getType() const { return TypeID::NUMBER; };
|
||||
|
||||
std::string Number::toString() const {
|
||||
if (isFloat) {
|
||||
return llvm::formatv("<Number {0}{1}>", std::get<double>(value));
|
||||
}
|
||||
return llvm::formatv("<Number {0}{1}>", std::get<long>(value));
|
||||
}
|
||||
|
||||
bool Number::classof(const Expression *e) {
|
||||
return e->getType() == TypeID::NUMBER;
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// List
|
||||
// ============================================================================
|
||||
List::List(const LocationRange &loc, Ast &v) : Expression(loc) {
|
||||
this->elements.swap(v);
|
||||
v.clear();
|
||||
};
|
||||
|
||||
TypeID List::getType() const { return TypeID::LIST; };
|
||||
|
||||
std::string List::toString() const {
|
||||
std::string s{this->elements.empty() ? "-" : ""};
|
||||
|
||||
for (const auto &n : this->elements) {
|
||||
s = llvm::formatv("{0}, {1}", s, n->toString());
|
||||
}
|
||||
|
||||
return llvm::formatv("<List {0}>", s);
|
||||
}
|
||||
|
||||
bool List::classof(const Expression *e) {
|
||||
return e->getType() == TypeID::LIST;
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// String
|
||||
// ============================================================================
|
||||
String::String(const LocationRange &loc, llvm::StringRef v)
|
||||
: Expression(loc), data(v.str()){};
|
||||
|
||||
String::String(String &s) : Expression(s.location), data(s.data){};
|
||||
|
||||
TypeID String::getType() const { return TypeID::STRING; };
|
||||
|
||||
std::string String::toString() const {
|
||||
const short truncateSize = 10;
|
||||
return llvm::formatv(
|
||||
"<String '{0}'>",
|
||||
data.substr(0, data.size() >= truncateSize ? truncateSize : data.size()));
|
||||
}
|
||||
|
||||
bool String::classof(const Expression *e) {
|
||||
return e->getType() == TypeID::STRING;
|
||||
};
|
||||
// ============================================================================
|
||||
// Keyword
|
||||
// ============================================================================
|
||||
Keyword::Keyword(const LocationRange &loc, llvm::StringRef name)
|
||||
: Expression(loc), name(name.str()){};
|
||||
|
||||
Keyword::Keyword(Keyword &s) : Expression(s.location) { this->name = s.name; };
|
||||
|
||||
TypeID Keyword::getType() const { return TypeID::KEYWORD; };
|
||||
|
||||
std::string Keyword::toString() const {
|
||||
return llvm::formatv("<Keyword {0}>", name);
|
||||
}
|
||||
|
||||
bool Keyword::classof(const Expression *e) {
|
||||
return e->getType() == TypeID::KEYWORD;
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Error
|
||||
// ============================================================================
|
||||
Error::Error(const LocationRange &loc, std::unique_ptr<Keyword> tag,
|
||||
llvm::StringRef msg)
|
||||
: Expression(loc), msg(msg.str()), tag(std::move(tag)){};
|
||||
|
||||
Error::Error(Error &e) : Expression(e.location) {
|
||||
this->msg = e.msg;
|
||||
this->tag = std::move(e.tag);
|
||||
};
|
||||
|
||||
TypeID Error::getType() const { return TypeID::KEYWORD; };
|
||||
|
||||
std::string Error::toString() const {
|
||||
return llvm::formatv("<Error {0}>", msg);
|
||||
}
|
||||
|
||||
bool Error::classof(const Expression *e) {
|
||||
return e->getType() == TypeID::KEYWORD;
|
||||
};
|
||||
|
||||
} // namespace serene::ast
|
|
@ -0,0 +1,254 @@
|
|||
/* -*- C++ -*-
|
||||
* Serene Programming Language
|
||||
*
|
||||
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef AST_H
|
||||
#define AST_H
|
||||
|
||||
#include "location.h"
|
||||
#include "serene/config.h"
|
||||
|
||||
#include <llvm/Support/Error.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace serene::ast {
|
||||
|
||||
struct Expression;
|
||||
|
||||
using Node = std::unique_ptr<Expression>;
|
||||
using MaybeNode = llvm::Expected<Node>;
|
||||
|
||||
using Ast = std::vector<Node>;
|
||||
using MaybeAst = llvm::Expected<Ast>;
|
||||
|
||||
constexpr static auto EmptyNode = nullptr;
|
||||
|
||||
// ============================================================================
|
||||
// Expression
|
||||
// The abstract class that all the AST nodes derived from. It provides the
|
||||
// common interface for the expressions to implement.
|
||||
// ============================================================================
|
||||
struct Expression {
|
||||
|
||||
/// The location range provide information regarding to where in the input
|
||||
/// string the current expression is used.
|
||||
LocationRange location;
|
||||
|
||||
Expression(const LocationRange &loc) : location(loc){};
|
||||
virtual ~Expression() = default;
|
||||
|
||||
/// Returns the type of the expression. We need this funciton to perform
|
||||
/// dynamic casting of expression object to implementations such as lisp or
|
||||
/// symbol.
|
||||
virtual TypeID getType() const = 0;
|
||||
|
||||
/// The AST representa htion of an expression
|
||||
virtual std::string toString() const = 0;
|
||||
|
||||
/// Analyzes the semantics of current node and return a new node in case
|
||||
/// that we need to semantically rewrite the current node and replace it with
|
||||
/// another node. For example to change from a List containing `(def a b)`
|
||||
/// to a `Def` node that represents defining a new binding.
|
||||
///
|
||||
/// \param state is the analysis state object of the semantic analyzer.
|
||||
// virtual MaybeNode analyze(semantics::AnalysisState &state) = 0;
|
||||
|
||||
/// Genenates the correspondig SLIR of the expressoin and attach it to the
|
||||
/// given module.
|
||||
///
|
||||
/// \param ns The namespace that current expression is in it.
|
||||
/// \param m The target MLIR moduleOp to attach the operations to
|
||||
// virtual void generateIR(serene::Namespace &ns, mlir::ModuleOp &m) = 0;
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Symbol
|
||||
// It represent a lisp symbol (don't mix it up with ELF symbols).
|
||||
// ============================================================================
|
||||
struct Symbol : public Expression {
|
||||
std::string name;
|
||||
std::string nsName;
|
||||
|
||||
Symbol(const LocationRange &loc, llvm::StringRef name,
|
||||
llvm::StringRef currentNS);
|
||||
Symbol(Symbol &s);
|
||||
|
||||
TypeID getType() const override;
|
||||
std::string toString() const override;
|
||||
|
||||
~Symbol() = default;
|
||||
|
||||
static bool classof(const Expression *e);
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Number
|
||||
// ============================================================================
|
||||
struct Number : public Expression {
|
||||
// TODO: [ast] Split the number type into their own types
|
||||
std::variant<long, double> value;
|
||||
// /TODO
|
||||
|
||||
bool isNeg;
|
||||
bool isFloat;
|
||||
|
||||
Number(const LocationRange &loc, const long &num);
|
||||
Number(const LocationRange &loc, const unsigned long &num);
|
||||
Number(const LocationRange &loc, const double &num);
|
||||
Number(Number &n);
|
||||
|
||||
TypeID getType() const override;
|
||||
std::string toString() const override;
|
||||
|
||||
~Number() = default;
|
||||
|
||||
static bool classof(const Expression *e);
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// List
|
||||
// ============================================================================
|
||||
struct List : public Expression {
|
||||
Ast elements;
|
||||
|
||||
List(const LocationRange &loc, Ast &v);
|
||||
List(const List &l) = delete;
|
||||
List(List &&l) noexcept = default;
|
||||
|
||||
TypeID getType() const override;
|
||||
std::string toString() const override;
|
||||
|
||||
~List() = default;
|
||||
|
||||
static bool classof(const Expression *e);
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// String
|
||||
// ============================================================================
|
||||
struct String : public Expression {
|
||||
std::string data;
|
||||
|
||||
String(const LocationRange &loc, llvm::StringRef v);
|
||||
String(String &s);
|
||||
|
||||
TypeID getType() const override;
|
||||
std::string toString() const override;
|
||||
|
||||
~String() = default;
|
||||
|
||||
static bool classof(const Expression *e);
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Keyword
|
||||
// ============================================================================
|
||||
struct Keyword : public Expression {
|
||||
std::string name;
|
||||
|
||||
Keyword(const LocationRange &loc, llvm::StringRef name);
|
||||
Keyword(Keyword &s);
|
||||
|
||||
TypeID getType() const override;
|
||||
std::string toString() const override;
|
||||
|
||||
~Keyword() = default;
|
||||
|
||||
static bool classof(const Expression *e);
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Error
|
||||
// One way of representing errors is to just treat them as another type of node
|
||||
// in the AST and the parser can generate them in case of any error or semantic
|
||||
// analizer can do the same. At the time of processing the AST by the JIT
|
||||
// or even anytime earlier we can just stop the execution and deal with the
|
||||
// issue
|
||||
// ============================================================================
|
||||
struct Error : public Expression {
|
||||
std::string msg;
|
||||
std::unique_ptr<Keyword> tag;
|
||||
|
||||
Error(const LocationRange &loc, std::unique_ptr<Keyword> tag,
|
||||
llvm::StringRef msg);
|
||||
Error(Error &e);
|
||||
|
||||
TypeID getType() const override;
|
||||
std::string toString() const override;
|
||||
|
||||
~Error() = default;
|
||||
|
||||
static bool classof(const Expression *e);
|
||||
};
|
||||
|
||||
/// Create a new `node` of type `T` and forwards any given parameter
|
||||
/// to the constructor of type `T`. This is the **official way** to create
|
||||
/// a new `Expression`. Here is an example:
|
||||
/// \code
|
||||
/// auto list = make<List>();
|
||||
/// \endcode
|
||||
///
|
||||
/// \param[args] Any argument with any type passed to this function will be
|
||||
/// passed to the constructor of type T.
|
||||
/// \return A unique pointer to an Expression
|
||||
template <typename T, typename... Args>
|
||||
Node make(Args &&...args) {
|
||||
return std::make_unique<T>(std::forward<Args>(args)...);
|
||||
};
|
||||
/// Create a new `node` of type `T` and forwards any given parameter
|
||||
/// to the constructor of type `T`. This is the **official way** to create
|
||||
/// a new `Expression`. Here is an example:
|
||||
/// \code
|
||||
/// auto list = makeAndCast<List>();
|
||||
/// \endcode
|
||||
///
|
||||
/// \param[args] Any argument with any type passed to this function will be
|
||||
/// passed to the constructor of type T.
|
||||
/// \return A unique pointer to a value of type T.
|
||||
template <typename T, typename... Args>
|
||||
std::shared_ptr<T> makeAndCast(Args &&...args) {
|
||||
return std::make_unique<T>(std::forward<Args>(args)...);
|
||||
};
|
||||
|
||||
/// The helper function to create a new `Node` and returnsit. It should be useds
|
||||
/// where every we want to return a `MaybeNode` successfully.
|
||||
template <typename T, typename... Args>
|
||||
MaybeNode makeSuccessfulNode(Args &&...args) {
|
||||
return make<T>(std::forward<Args>(args)...);
|
||||
};
|
||||
|
||||
/// The hlper function to creates an Error (`llvm::Error`) by passing all
|
||||
/// the given arguments to the constructor of the template param `E`.
|
||||
template <typename E, typename T = Node, typename... Args>
|
||||
llvm::Expected<T> makeErrorful(Args &&...args) {
|
||||
return llvm::make_error<E>(std::forward<Args>(args)...);
|
||||
};
|
||||
|
||||
/// The hlper function to creates an Error (`llvm::Error`) by passing all
|
||||
/// the given arguments to the constructor of the template param `E`.
|
||||
template <typename E, typename... Args>
|
||||
MaybeNode makeErrorNode(Args &&...args) {
|
||||
return makeErrorful<E, Node>(std::forward<Args>(args)...);
|
||||
};
|
||||
|
||||
/// Converts the given AST to string and prints it out
|
||||
void dump(Ast &);
|
||||
|
||||
} // namespace serene::ast
|
||||
|
||||
#endif
|
|
@ -0,0 +1,80 @@
|
|||
/* -*- C++ -*-
|
||||
* Serene Programming Language
|
||||
*
|
||||
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef ENVIRONMENT_H
|
||||
#define ENVIRONMENT_H
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
#include <llvm/ADT/StringMap.h>
|
||||
#include <mlir/Support/LogicalResult.h>
|
||||
|
||||
namespace serene {
|
||||
|
||||
/// This class represents a classic lisp environment (or scope) that holds the
|
||||
/// bindings from type `K` to type `V`. For example an environment of symbols
|
||||
/// to expressions would be `Environment<Symbol, Node>`
|
||||
template <typename V>
|
||||
class Environment {
|
||||
|
||||
Environment<V> *parent;
|
||||
|
||||
using StorageType = llvm::StringMap<V>;
|
||||
// The actual bindings storage
|
||||
StorageType pairs;
|
||||
|
||||
public:
|
||||
Environment() : parent(nullptr) {}
|
||||
explicit Environment(Environment *parent) : parent(parent){};
|
||||
|
||||
/// Look up the given `key` in the environment and return it.
|
||||
std::optional<V> lookup(llvm::StringRef key) {
|
||||
if (auto value = pairs.lookup(key)) {
|
||||
return value;
|
||||
}
|
||||
|
||||
if (parent) {
|
||||
return parent->lookup(key);
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
/// Insert the given `key` with the given `value` into the storage. This
|
||||
/// operation will shadow an aleady exist `key` in the parent environment
|
||||
mlir::LogicalResult insert_symbol(llvm::StringRef key, V value) {
|
||||
auto result = pairs.insert_or_assign(key, value);
|
||||
UNUSED(result);
|
||||
return mlir::success();
|
||||
};
|
||||
|
||||
inline typename StorageType::iterator begin() { return pairs.begin(); }
|
||||
|
||||
inline typename StorageType::iterator end() { return pairs.end(); }
|
||||
|
||||
inline typename StorageType::const_iterator begin() const {
|
||||
return pairs.begin();
|
||||
}
|
||||
inline typename StorageType::const_iterator end() const {
|
||||
return pairs.end();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace serene
|
||||
|
||||
#endif
|
|
@ -119,6 +119,15 @@ class JIT {
|
|||
|
||||
llvm::Error createCurrentProcessJD();
|
||||
|
||||
// Anonymous function counter. We need to assing a unique name to each
|
||||
// anonymous function and we use this counter to generate those names
|
||||
std::atomic<uint> fn_counter = 0;
|
||||
|
||||
// Since indexing namespaces by the name would be inefficient, We use
|
||||
// unsigned integer and assign a number to all the namespaces at the
|
||||
// creation time. Namespace IDs have to be unique.
|
||||
std::atomic<uint> ns_counter = 0;
|
||||
|
||||
public:
|
||||
JIT(llvm::orc::JITTargetMachineBuilder &&jtmb, std::unique_ptr<Options> opts);
|
||||
static MaybeJIT make(llvm::orc::JITTargetMachineBuilder &&jtmb,
|
||||
|
@ -151,6 +160,8 @@ public:
|
|||
void setLoadPaths(std::vector<const char *> &dirs) { loadPaths.swap(dirs); };
|
||||
/// Return the load paths for namespaces
|
||||
llvm::ArrayRef<const char *> getLoadPaths() { return loadPaths; };
|
||||
|
||||
const Options &getOptions() const { return *options; };
|
||||
};
|
||||
|
||||
MaybeJIT makeJIT(std::unique_ptr<Options> opts);
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
/* -*- C++ -*-
|
||||
* Serene Programming Language
|
||||
*
|
||||
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef LOCATION_H
|
||||
#define LOCATION_H
|
||||
|
||||
#include <mlir/IR/Diagnostics.h>
|
||||
#include <mlir/IR/Location.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace serene {
|
||||
|
||||
/// It represents a location in the input string to the parser via `line`,
|
||||
struct Location {
|
||||
/// Since namespaces are our unit of compilation, we need to have
|
||||
/// a namespace in hand
|
||||
llvm::StringRef ns;
|
||||
|
||||
std::optional<llvm::StringRef> filename = std::nullopt;
|
||||
/// A pointer to the character that this location is pointing to
|
||||
/// it the input buffer
|
||||
const char *c = nullptr;
|
||||
|
||||
/// At this stage we only support 65535 lines of code in each file
|
||||
unsigned short int line = 0;
|
||||
/// At this stage we only support 65535 chars in each line
|
||||
unsigned short int col = 0;
|
||||
|
||||
bool knownLocation = true;
|
||||
|
||||
::std::string toString() const;
|
||||
|
||||
Location() = default;
|
||||
explicit Location(llvm::StringRef ns,
|
||||
std::optional<llvm::StringRef> fname = std::nullopt,
|
||||
const char *c = nullptr, unsigned short int line = 0,
|
||||
unsigned short int col = 0, bool knownLocation = true)
|
||||
: ns(ns), filename(fname), c(c), line(line), col(col),
|
||||
knownLocation(knownLocation){};
|
||||
|
||||
Location clone() const;
|
||||
|
||||
// mlir::Location toMLIRLocation(mlir::MLIRContext &ctx);
|
||||
|
||||
/// Returns an unknown location for the given \p ns.
|
||||
static Location UnknownLocation(llvm::StringRef ns) {
|
||||
return Location(ns, std::nullopt, nullptr, 0, 0, false);
|
||||
}
|
||||
};
|
||||
|
||||
class LocationRange {
|
||||
public:
|
||||
Location start;
|
||||
Location end;
|
||||
|
||||
LocationRange() = default;
|
||||
explicit LocationRange(Location _start) : start(_start), end(_start){};
|
||||
LocationRange(Location _start, Location _end) : start(_start), end(_end){};
|
||||
// LocationRange(const LocationRange &);
|
||||
|
||||
bool isKnownLocation() const { return start.knownLocation; };
|
||||
|
||||
static LocationRange UnknownLocation(llvm::StringRef ns) {
|
||||
return LocationRange(Location::UnknownLocation(ns));
|
||||
}
|
||||
};
|
||||
|
||||
void incLocation(Location &, const char *);
|
||||
void decLocation(Location &, const char *);
|
||||
|
||||
} // namespace serene
|
||||
#endif
|
|
@ -0,0 +1,223 @@
|
|||
/* -*- C++ -*-
|
||||
* Serene Programming Language
|
||||
*
|
||||
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include "namespace.h"
|
||||
|
||||
#include "jit/jit.h"
|
||||
|
||||
#include <llvm/ADT/StringRef.h>
|
||||
#include <llvm/Support/FormatVariadic.h>
|
||||
#include <llvm/Support/raw_ostream.h>
|
||||
#include <mlir/IR/Builders.h>
|
||||
#include <mlir/IR/BuiltinOps.h>
|
||||
#include <mlir/IR/Verifier.h>
|
||||
#include <mlir/Support/LogicalResult.h>
|
||||
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
using namespace llvm;
|
||||
|
||||
namespace serene {
|
||||
|
||||
Namespace::Namespace(jit::JIT &engine, llvm::StringRef ns_name,
|
||||
std::optional<llvm::StringRef> filename)
|
||||
: engine(engine), name(ns_name) {
|
||||
if (filename.has_value()) {
|
||||
this->filename.emplace(filename.value().str());
|
||||
}
|
||||
|
||||
// Create the root environment
|
||||
createEnv(nullptr);
|
||||
};
|
||||
|
||||
SemanticEnv &Namespace::createEnv(SemanticEnv *parent) {
|
||||
auto env = std::make_unique<SemanticEnv>(parent);
|
||||
environments.push_back(std::move(env));
|
||||
|
||||
return *environments.back();
|
||||
};
|
||||
|
||||
SemanticEnv &Namespace::getRootEnv() {
|
||||
assert(!environments.empty() && "Root env is not created!");
|
||||
|
||||
return *environments.front();
|
||||
};
|
||||
|
||||
// mlir::LogicalResult Namespace::define(std::string &name, ast::Node &node) {
|
||||
// auto &rootEnv = getRootEnv();
|
||||
|
||||
// if (failed(rootEnv.insert_symbol(name, node))) {
|
||||
// return mlir::failure();
|
||||
// }
|
||||
|
||||
// symbolList.push_back(name);
|
||||
// return mlir::success();
|
||||
// }
|
||||
|
||||
ast::Ast &Namespace::getTree() { return this->tree; }
|
||||
|
||||
llvm::Error Namespace::ExpandTree(ast::Ast &ast) {
|
||||
|
||||
// If the target phase is just parsing we don't want
|
||||
// to run the semantic analyzer or anything beyond parser
|
||||
if (engine.getOptions().compilationPhase == CompilationPhase::Parse) {
|
||||
// we just want the raw AST
|
||||
this->tree.insert(this->tree.end(), std::make_move_iterator(ast.begin()),
|
||||
std::make_move_iterator(ast.end()));
|
||||
ast.clear();
|
||||
return llvm::Error::success();
|
||||
}
|
||||
|
||||
// just for now
|
||||
this->tree.insert(this->tree.end(), std::make_move_iterator(ast.begin()),
|
||||
std::make_move_iterator(ast.end()));
|
||||
ast.clear();
|
||||
|
||||
// auto &rootEnv = getRootEnv();
|
||||
|
||||
// auto state = semantics::makeAnalysisState(*this, rootEnv);
|
||||
// // Run the semantic analyer on the ast and then if everything
|
||||
// // is ok add the form to the tree and forms
|
||||
// auto maybeForm = semantics::analyze(*state, ast);
|
||||
|
||||
// if (!maybeForm) {
|
||||
// return maybeForm.takeError();
|
||||
// }
|
||||
|
||||
// auto semanticAst = std::move(*maybeForm);
|
||||
// this->tree.insert(this->tree.end(), semanticAst.begin(),
|
||||
// semanticAst.end());
|
||||
|
||||
return llvm::Error::success();
|
||||
}
|
||||
|
||||
// MaybeModuleOp Namespace::generate(unsigned offset) {
|
||||
// // The reason why we return an optional value instead of Errors
|
||||
// // is the way MLIR's diagnostic engine works. Passes may use
|
||||
// // the `emit` function of operations to report errors to the
|
||||
// // diagnostic engine. So we can't return any error diractly.
|
||||
|
||||
// mlir::OpBuilder builder(&ctx.mlirContext);
|
||||
|
||||
// // TODO: Fix the unknown location by pointing to the `ns` form
|
||||
// auto module = mlir::ModuleOp::create(builder.getUnknownLoc(),
|
||||
// std::optional<llvm::StringRef>(name));
|
||||
|
||||
// auto treeSize = getTree().size();
|
||||
|
||||
// // Walk the AST and call the `generateIR` function of each node.
|
||||
// // Since nodes will have access to the a reference of the
|
||||
// // namespace they can use the builder and keep adding more
|
||||
// // operations to the module via the builder
|
||||
// for (unsigned i = offset; i < treeSize; ++i) {
|
||||
// auto &node = getTree()[i];
|
||||
// node->generateIR(*this, module);
|
||||
// }
|
||||
|
||||
// if (mlir::failed(mlir::verify(module))) {
|
||||
// module.emitError("Can't verify the module");
|
||||
// module.erase();
|
||||
// return llvm::None;
|
||||
// }
|
||||
|
||||
// if (mlir::failed(runPasses(module))) {
|
||||
// // TODO: Report a proper error
|
||||
// module.emitError("Failure in passes!");
|
||||
// module.erase();
|
||||
// return llvm::None;
|
||||
// }
|
||||
|
||||
// return MaybeModuleOp(module);
|
||||
// }
|
||||
|
||||
// mlir::LogicalResult Namespace::runPasses(mlir::ModuleOp &m) {
|
||||
// return ctx.pm.run(m);
|
||||
// };
|
||||
|
||||
// void Namespace::dump() {
|
||||
// llvm::outs() << "\nMLIR: \n";
|
||||
// auto maybeModuleOp = generate();
|
||||
|
||||
// if (!maybeModuleOp) {
|
||||
|
||||
// llvm::errs() << "Failed to generate the IR.\n";
|
||||
// return;
|
||||
// }
|
||||
|
||||
// mlir::OpPrintingFlags flags;
|
||||
// flags.enableDebugInfo();
|
||||
|
||||
// maybeModuleOp.getValue()->print(llvm::outs(), flags);
|
||||
// };
|
||||
|
||||
// MaybeModule Namespace::compileToLLVM() {
|
||||
// // The reason why we return an optional value instead of Errors
|
||||
// // is the way MLIR's diagnostic engine works. Passes may use
|
||||
// // the `emit` function of operations to report errors to the
|
||||
// // diagnostic engine. So we can't return any error diractly.
|
||||
|
||||
// auto maybeModule = generate();
|
||||
|
||||
// if (!maybeModule) {
|
||||
// NAMESPACE_LOG("IR generation failed for '" << name << "'");
|
||||
// return llvm::None;
|
||||
// }
|
||||
|
||||
// if (ctx.getTargetPhase() >= CompilationPhase::IR) {
|
||||
// mlir::ModuleOp module = maybeModule.getValue().get();
|
||||
// return ::serene::slir::compileToLLVMIR(ctx, module);
|
||||
// }
|
||||
|
||||
// return llvm::None;
|
||||
// };
|
||||
|
||||
// MaybeModule Namespace::compileToLLVMFromOffset(unsigned offset) {
|
||||
// // The reason why we return an optional value instead of Errors
|
||||
// // is the way MLIR's diagnostic engine works. Passes may use
|
||||
// // the `emit` function of operations to report errors to the
|
||||
// // diagnostic engine. So we can't return any error diractly.
|
||||
|
||||
// auto maybeModule = generate(offset);
|
||||
|
||||
// if (!maybeModule) {
|
||||
// NAMESPACE_LOG("IR generation failed for '" << name << "'");
|
||||
// return llvm::None;
|
||||
// }
|
||||
|
||||
// if (ctx.getTargetPhase() >= CompilationPhase::IR) {
|
||||
// mlir::ModuleOp module = maybeModule.getValue().get();
|
||||
// return ::serene::slir::compileToLLVMIR(ctx, module);
|
||||
// }
|
||||
|
||||
// return llvm::None;
|
||||
// };
|
||||
|
||||
NSPtr Namespace::make(jit::JIT &engine, llvm::StringRef name,
|
||||
std::optional<llvm::StringRef> filename) {
|
||||
return std::make_unique<Namespace>(engine, name, filename);
|
||||
};
|
||||
|
||||
Namespace::~Namespace() {
|
||||
// TODO: Clean up anything related to this namespace in the context
|
||||
// TODO: Remove anything related to this namespace in the JIT
|
||||
NAMESPACE_LOG("Destructing NS: " << name);
|
||||
};
|
||||
|
||||
} // namespace serene
|
|
@ -0,0 +1,140 @@
|
|||
/* -*- C++ -*-
|
||||
* Serene Programming Language
|
||||
*
|
||||
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Commentary:
|
||||
* Rules of a namespace:
|
||||
* - A namespace has have a name and it has to own it.
|
||||
* - A namespace may or may not be associated with a file
|
||||
* - The internal AST of a namespace is an evergrowing tree which may expand at
|
||||
* any given time. For example via iteration of a REPL
|
||||
* - `environments` vector is the owner of all the semantic envs
|
||||
* - The first env in the `environments` is the root env.
|
||||
*
|
||||
* How to create a namespace ?
|
||||
* The official way to create a namespace object is to use the `SereneContext`
|
||||
* object and call `readNamespace`, `importNamespace` or `makeNamespace`.
|
||||
*/
|
||||
|
||||
// TODO: Add a mechanism to figure out whether a namespace has changed or not
|
||||
// either on memory or disk
|
||||
|
||||
#ifndef NAMESPACE_H
|
||||
#define NAMESPACE_H
|
||||
|
||||
#include "ast.h"
|
||||
#include "environment.h"
|
||||
#include "utils.h"
|
||||
|
||||
#include <llvm/ADT/SmallString.h>
|
||||
#include <llvm/ADT/SmallVector.h>
|
||||
#include <llvm/ADT/StringRef.h>
|
||||
#include <llvm/ADT/Twine.h>
|
||||
#include <llvm/ExecutionEngine/Orc/Core.h>
|
||||
#include <llvm/ExecutionEngine/Orc/ThreadSafeModule.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/Support/Error.h>
|
||||
#include <mlir/IR/Builders.h>
|
||||
#include <mlir/IR/BuiltinOps.h>
|
||||
#include <mlir/IR/OwningOpRef.h>
|
||||
#include <mlir/IR/Value.h>
|
||||
#include <mlir/Support/LogicalResult.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#define NAMESPACE_LOG(...) \
|
||||
DEBUG_WITH_TYPE("NAMESPACE", llvm::dbgs() << __VA_ARGS__ << "\n");
|
||||
|
||||
namespace serene {
|
||||
namespace jit {
|
||||
class JIT;
|
||||
} // namespace jit
|
||||
|
||||
class Namespace;
|
||||
|
||||
using NSPtr = std::unique_ptr<Namespace>;
|
||||
using MaybeNS = llvm::Expected<NSPtr>;
|
||||
using SemanticEnv = Environment<ast::Node>;
|
||||
using SemanticEnvPtr = std::unique_ptr<SemanticEnv>;
|
||||
using SemanticEnvironments = std::vector<SemanticEnvPtr>;
|
||||
|
||||
/// Serene's namespaces are the unit of compilation. Any code that needs to be
|
||||
/// compiled has to be in a namespace. The official way to create a new
|
||||
/// namespace is to use the `readNamespace`, `importNamespace` and
|
||||
/// `makeNamespace` member functions of `SereneContext`.
|
||||
class Namespace {
|
||||
jit::JIT &engine;
|
||||
/// The content of the namespace. It should alway hold a semantically
|
||||
/// correct AST. It means thet the AST that we want to store here has
|
||||
/// to pass the semantic analyzer checks.
|
||||
ast::Ast tree;
|
||||
|
||||
SemanticEnvironments environments;
|
||||
|
||||
std::vector<llvm::StringRef> symbolList;
|
||||
|
||||
public:
|
||||
std::string name;
|
||||
std::optional<std::string> filename;
|
||||
|
||||
/// Create a naw namespace with the given `name` and optional `filename` and
|
||||
/// return a unique pointer to it in the given Serene context.
|
||||
static NSPtr make(jit::JIT &engine, llvm::StringRef name,
|
||||
std::optional<llvm::StringRef> filename);
|
||||
|
||||
Namespace(jit::JIT &engine, llvm::StringRef ns_name,
|
||||
std::optional<llvm::StringRef> filename);
|
||||
|
||||
/// Create a new environment with the give \p parent as the parent,
|
||||
/// push the environment to the internal environment storage and
|
||||
/// return a reference to it. The namespace itself is the owner of
|
||||
/// environments.
|
||||
SemanticEnv &createEnv(SemanticEnv *parent);
|
||||
|
||||
/// Return a referenece to the top level (root) environment of ns.
|
||||
SemanticEnv &getRootEnv();
|
||||
|
||||
/// Define a new binding in the root environment with the given \p name
|
||||
/// and the given \p node. Defining a new binding with a name that
|
||||
/// already exists in legal and will overwrite the previous binding and
|
||||
/// the given name will point to a new value from now on.
|
||||
mlir::LogicalResult define(std::string &name, ast::Node &node);
|
||||
|
||||
/// Add the given \p ast to the namespace and return any possible error.
|
||||
/// The given \p ast will be added to a vector of ASTs by expanding
|
||||
/// the tree vector to contain \p ast.
|
||||
///
|
||||
/// This function runs the semantic analyzer on the \p ast as well.
|
||||
llvm::Error ExpandTree(ast::Ast &ast);
|
||||
|
||||
ast::Ast &getTree();
|
||||
|
||||
const std::vector<llvm::StringRef> &getSymList() { return symbolList; };
|
||||
|
||||
/// Dumps the namespace with respect to the compilation phase
|
||||
// void dump();
|
||||
|
||||
~Namespace();
|
||||
};
|
||||
|
||||
} // namespace serene
|
||||
|
||||
#endif
|
|
@ -65,7 +65,7 @@ struct Options {
|
|||
// appropriate code for the host. If the same function has to be part
|
||||
// of the runtime, then we use `targetTriple` again to generate the code
|
||||
// for the target platform. So, we might end up with two version of the
|
||||
// same function
|
||||
// same function.
|
||||
const llvm::Triple hostTriple;
|
||||
|
||||
CompilationPhase compilationPhase = CompilationPhase::NoOptimization;
|
||||
|
|
|
@ -0,0 +1,431 @@
|
|||
/* -*- C++ -*-
|
||||
* Serene Programming Language
|
||||
*
|
||||
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "reader.h"
|
||||
|
||||
// #include "serene/errors.h"
|
||||
// #include "serene/exprs/expression.h"
|
||||
// #include "serene/exprs/list.h"
|
||||
// #include "serene/exprs/number.h"
|
||||
// #include "serene/exprs/symbol.h"
|
||||
// #include "serene/namespace.h"
|
||||
// #include "serene/utils.h"
|
||||
|
||||
#include <llvm/ADT/StringRef.h>
|
||||
#include <llvm/Support/Error.h>
|
||||
#include <llvm/Support/ErrorHandling.h>
|
||||
#include <llvm/Support/ErrorOr.h>
|
||||
#include <llvm/Support/FormatVariadic.h>
|
||||
#include <llvm/Support/MemoryBuffer.h>
|
||||
#include <llvm/Support/SMLoc.h>
|
||||
#include <mlir/IR/Diagnostics.h>
|
||||
#include <mlir/IR/Location.h>
|
||||
#include <mlir/IR/MLIRContext.h>
|
||||
#include <mlir/Support/LogicalResult.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <cctype>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
namespace serene {
|
||||
|
||||
namespace reader {
|
||||
// LocationRange::LocationRange(const LocationRange &loc) {
|
||||
// start = loc.start.clone();
|
||||
// end = loc.end.clone();
|
||||
// }
|
||||
|
||||
/// Return the string represenation of the location.
|
||||
std::string Location::toString() const {
|
||||
return llvm::formatv("{0}:{1}", line, col);
|
||||
};
|
||||
|
||||
Location Location::clone() const {
|
||||
return Location{ns, filename, c, line, col, knownLocation};
|
||||
}
|
||||
|
||||
/// Increase the given location by one and set the line/col value in respect to
|
||||
/// the `newline` in place.
|
||||
/// \param loc The `Location` data
|
||||
/// \param c A pointer to the current char that the location has to point to
|
||||
void incLocation(Location &loc, const char *c) {
|
||||
// TODO: Handle the end of line with respect to the OS.
|
||||
// increase the current position in the buffer with respect to the end
|
||||
// of line.
|
||||
auto newline = *c == '\n';
|
||||
|
||||
if (!newline) {
|
||||
loc.col++;
|
||||
} else {
|
||||
loc.line++;
|
||||
loc.col = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// decrease the given location by one and set the line/col value in respect to
|
||||
/// the `newline` in place.
|
||||
/// \param loc The `Location` data
|
||||
/// \param c A pointer to the current char that the location has to point to
|
||||
void decLocation(Location &loc, const char *c) {
|
||||
// TODO: Handle the end of line with respect to the OS.
|
||||
// increase the current position in the buffer with respect to the end
|
||||
// of line.
|
||||
auto newline = *c == '\n';
|
||||
|
||||
if (newline) {
|
||||
loc.line = loc.line == 0 ? 0 : loc.line - 1;
|
||||
|
||||
// We don't move back the `col` value because we simply don't know it
|
||||
} else {
|
||||
loc.col = loc.col == 0 ? 0 : loc.col - 1;
|
||||
}
|
||||
}
|
||||
|
||||
Reader::Reader(SereneContext &ctx, llvm::StringRef buffer, llvm::StringRef ns,
|
||||
std::optional<llvm::StringRef> filename)
|
||||
: ctx(ctx), ns(ns), filename(filename), buf(buffer),
|
||||
currentLocation(Location(ns, filename)) {
|
||||
UNUSED(this->ctx);
|
||||
READER_LOG("Setting the first char of the buffer");
|
||||
currentChar = buf.begin() - 1;
|
||||
currentPos = 1;
|
||||
currentLocation.line = 1;
|
||||
currentLocation.col = 1;
|
||||
};
|
||||
|
||||
Reader::Reader(SereneContext &ctx, llvm::MemoryBufferRef buffer,
|
||||
llvm::StringRef ns, std::optional<llvm::StringRef> filename)
|
||||
: Reader(ctx, buffer.getBuffer(), ns, filename){};
|
||||
|
||||
Reader::~Reader() { READER_LOG("Destroying the reader"); }
|
||||
|
||||
void Reader::advanceByOne() {
|
||||
currentChar++;
|
||||
currentPos++;
|
||||
currentLocation.col++;
|
||||
|
||||
if (*currentChar == '\n') {
|
||||
READER_LOG("Detected end of line");
|
||||
|
||||
if (readEOL) {
|
||||
currentLocation.col = 1;
|
||||
currentLocation.line++;
|
||||
}
|
||||
|
||||
readEOL = true;
|
||||
} else {
|
||||
if (readEOL) {
|
||||
currentLocation.line++;
|
||||
currentLocation.col = 1;
|
||||
}
|
||||
readEOL = false;
|
||||
}
|
||||
|
||||
READER_LOG("Moving to Char: " << *currentChar << " at location: "
|
||||
<< currentLocation.toString());
|
||||
};
|
||||
void Reader::advance(bool skipWhitespace) {
|
||||
if (skipWhitespace) {
|
||||
for (;;) {
|
||||
const auto *next = currentChar + 1;
|
||||
|
||||
if (isspace(*next) == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
advanceByOne();
|
||||
}
|
||||
} else {
|
||||
advanceByOne();
|
||||
}
|
||||
};
|
||||
|
||||
const char *Reader::nextChar(bool skipWhitespace, unsigned count) {
|
||||
if (!skipWhitespace) {
|
||||
READER_LOG("Next char: " << *(currentChar + count));
|
||||
return currentChar + count;
|
||||
}
|
||||
|
||||
const auto *c = currentChar + 1;
|
||||
while (isspace(*c) != 0) {
|
||||
c++;
|
||||
};
|
||||
|
||||
READER_LOG("Next char: " << *c);
|
||||
return c;
|
||||
};
|
||||
|
||||
bool Reader::isEndOfBuffer(const char *c) {
|
||||
return *c == '\0' || currentPos > buf.size() || ((const int)*c == EOF);
|
||||
};
|
||||
|
||||
Location Reader::getCurrentLocation() { return currentLocation.clone(); };
|
||||
|
||||
/// A predicate function indicating whether the given char `c` is a valid
|
||||
/// char for the starting point of a symbol or not.
|
||||
bool Reader::isValidForIdentifier(char c) {
|
||||
switch (c) {
|
||||
case '!':
|
||||
case '$':
|
||||
case '%':
|
||||
case '&':
|
||||
case '*':
|
||||
case '+':
|
||||
case '-':
|
||||
case '.':
|
||||
case '~':
|
||||
case '/':
|
||||
case ':':
|
||||
case '<':
|
||||
case '=':
|
||||
case '>':
|
||||
case '?':
|
||||
case '@':
|
||||
case '^':
|
||||
case '_':
|
||||
return true;
|
||||
}
|
||||
|
||||
return std::isalnum(c) != 0;
|
||||
}
|
||||
|
||||
/// Reads a number,
|
||||
/// \param neg whether to read a negative number or not.
|
||||
exprs::MaybeNode Reader::readNumber(bool neg) {
|
||||
READER_LOG("Reading a number...");
|
||||
std::string number(neg ? "-" : "");
|
||||
bool floatNum = false;
|
||||
bool empty = false;
|
||||
|
||||
const auto *c = nextChar();
|
||||
advance();
|
||||
|
||||
LocationRange loc(getCurrentLocation());
|
||||
|
||||
if (isdigit(*c) == 0) {
|
||||
return errors::makeError(ctx, errors::InvalidDigitForNumber, loc);
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
number += *c;
|
||||
c = nextChar(false);
|
||||
empty = false;
|
||||
|
||||
if ((isdigit(*c) != 0) || *c == '.') {
|
||||
if (*c == '.' && floatNum) {
|
||||
loc = LocationRange(getCurrentLocation());
|
||||
return errors::makeError(ctx, errors::TwoFloatPoints, loc);
|
||||
}
|
||||
|
||||
if (*c == '.') {
|
||||
floatNum = true;
|
||||
}
|
||||
|
||||
advance();
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (((std::isalpha(*c) != 0) && !empty) || empty) {
|
||||
advance();
|
||||
loc.start = getCurrentLocation();
|
||||
return errors::makeError(ctx, errors::InvalidDigitForNumber, loc);
|
||||
}
|
||||
|
||||
loc.end = getCurrentLocation();
|
||||
return exprs::make<exprs::Number>(loc, number, neg, floatNum);
|
||||
};
|
||||
|
||||
/// Reads a symbol. If the symbol looks like a number
|
||||
/// If reads it as number
|
||||
exprs::MaybeNode Reader::readSymbol() {
|
||||
READER_LOG("Reading a symbol...");
|
||||
LocationRange loc;
|
||||
const auto *c = nextChar();
|
||||
|
||||
if (!this->isValidForIdentifier(*c) || isEndOfBuffer(c) ||
|
||||
(isspace(*c) != 0)) {
|
||||
advance();
|
||||
loc = LocationRange(getCurrentLocation());
|
||||
std::string msg;
|
||||
|
||||
if (*c == ')') {
|
||||
msg = "An extra ')' is detected.";
|
||||
}
|
||||
|
||||
return errors::makeError(ctx, errors::InvalidCharacterForSymbol, loc, msg);
|
||||
}
|
||||
|
||||
if (*c == '-') {
|
||||
const auto *next = nextChar(false, 2);
|
||||
if (isdigit(*next) != 0) {
|
||||
// Swallow the -
|
||||
advance();
|
||||
return readNumber(true);
|
||||
}
|
||||
}
|
||||
|
||||
if (isdigit(*c) != 0) {
|
||||
return readNumber(false);
|
||||
}
|
||||
|
||||
std::string sym;
|
||||
advance();
|
||||
|
||||
for (;;) {
|
||||
sym += *c;
|
||||
c = nextChar();
|
||||
|
||||
if (!isEndOfBuffer(c) &&
|
||||
((((isspace(*c)) == 0) && this->isValidForIdentifier(*c)))) {
|
||||
advance();
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO: Make sure that the symbol has 0 or 1 '/'.
|
||||
|
||||
// TODO: Make sure that `/` is not at the start or at the end of the symbol
|
||||
|
||||
loc.end = getCurrentLocation();
|
||||
return exprs::makeSuccessfulNode<exprs::Symbol>(loc, sym, this->ns);
|
||||
};
|
||||
|
||||
/// Reads a list recursively
|
||||
exprs::MaybeNode Reader::readList() {
|
||||
READER_LOG("Reading a list...");
|
||||
|
||||
const auto *c = nextChar();
|
||||
advance();
|
||||
|
||||
auto list = exprs::makeAndCast<exprs::List>(getCurrentLocation());
|
||||
|
||||
// TODO: Replace the assert with an actual check.
|
||||
assert(*c == '(');
|
||||
|
||||
bool list_terminated = false;
|
||||
|
||||
do {
|
||||
const auto *c = nextChar(true);
|
||||
|
||||
if (isEndOfBuffer(c)) {
|
||||
advance(true);
|
||||
advance();
|
||||
list->location.end = getCurrentLocation();
|
||||
return errors::makeError(ctx, errors::EOFWhileScaningAList,
|
||||
list->location);
|
||||
}
|
||||
|
||||
switch (*c) {
|
||||
case ')':
|
||||
advance(true);
|
||||
advance();
|
||||
list_terminated = true;
|
||||
list->location.end = getCurrentLocation();
|
||||
break;
|
||||
|
||||
default:
|
||||
advance(true);
|
||||
auto expr = readExpr();
|
||||
if (!expr) {
|
||||
return expr;
|
||||
}
|
||||
|
||||
list->append(*expr);
|
||||
}
|
||||
|
||||
} while (!list_terminated);
|
||||
|
||||
return list;
|
||||
};
|
||||
|
||||
/// Reads an expression by dispatching to the proper reader function.
|
||||
exprs::MaybeNode Reader::readExpr() {
|
||||
const auto *c = nextChar(true);
|
||||
|
||||
READER_LOG("Read char at `readExpr`: " << *c);
|
||||
|
||||
if (isEndOfBuffer(c)) {
|
||||
return exprs::EmptyNode;
|
||||
}
|
||||
|
||||
switch (*c) {
|
||||
case '(': {
|
||||
advance(true);
|
||||
return readList();
|
||||
}
|
||||
|
||||
default:
|
||||
advance(true);
|
||||
return readSymbol();
|
||||
}
|
||||
};
|
||||
|
||||
/// Reads all the expressions in the reader's buffer as an AST.
|
||||
/// Each expression type (from the reader perspective) has a
|
||||
/// reader function.
|
||||
exprs::MaybeAst Reader::read() {
|
||||
|
||||
for (size_t current_pos = 0; current_pos < buf.size();) {
|
||||
const auto *c = nextChar(true);
|
||||
|
||||
if (isEndOfBuffer(c)) {
|
||||
break;
|
||||
}
|
||||
|
||||
advance(true);
|
||||
|
||||
auto tmp = readExpr();
|
||||
|
||||
if (tmp) {
|
||||
if (*tmp == nullptr) {
|
||||
break;
|
||||
}
|
||||
|
||||
this->ast.push_back(std::move(*tmp));
|
||||
|
||||
} else {
|
||||
return tmp.takeError();
|
||||
}
|
||||
}
|
||||
|
||||
return std::move(this->ast);
|
||||
};
|
||||
|
||||
exprs::MaybeAst read(SereneContext &ctx, const llvm::StringRef input,
|
||||
llvm::StringRef ns,
|
||||
std::optional<llvm::StringRef> filename) {
|
||||
reader::Reader r(ctx, input, ns, filename);
|
||||
auto ast = r.read();
|
||||
return ast;
|
||||
}
|
||||
|
||||
exprs::MaybeAst read(SereneContext &ctx, const llvm::MemoryBufferRef input,
|
||||
llvm::StringRef ns,
|
||||
std::optional<llvm::StringRef> filename) {
|
||||
reader::Reader r(ctx, input, ns, filename);
|
||||
|
||||
auto ast = r.read();
|
||||
return ast;
|
||||
}
|
||||
} // namespace reader
|
||||
} // namespace serene
|
|
@ -0,0 +1,112 @@
|
|||
/* -*- C++ -*-
|
||||
* Serene Programming Language
|
||||
*
|
||||
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Commentary:
|
||||
* `Reader` is the base parser class and accepts a buffer like object (usually
|
||||
* `llvm::StringRef`) as the input and parses it to create an AST (look at the
|
||||
* `serene::exprs::Expression` class).
|
||||
*
|
||||
* The parsing algorithm is quite simple and it is a LL(2). It means that, we
|
||||
* start parsing the input from the very first character and parse the input
|
||||
* one char at a time till we reach the end of the input. Please note that
|
||||
* when we call the `advance` function to move forward in the buffer, we
|
||||
* can't go back. In order to look ahead in the buffer without moving in the
|
||||
* buffer we use the `nextChar` method.
|
||||
*
|
||||
* We have dedicated methods to read different forms like `list`, `symbol`
|
||||
* `number` and etc. Each of them return a `MaybeNode` that in the success
|
||||
* case contains the node and an `Error` on the failure case.
|
||||
*/
|
||||
|
||||
#ifndef READER_H
|
||||
#define READER_H
|
||||
|
||||
#include "ast.h"
|
||||
#include "location.h"
|
||||
|
||||
#include <llvm/ADT/StringRef.h>
|
||||
#include <llvm/Support/MemoryBufferRef.h>
|
||||
|
||||
namespace serene {
|
||||
/// Base reader class which reads from a string directly.
|
||||
class Reader {
|
||||
private:
|
||||
llvm::StringRef ns;
|
||||
std::optional<llvm::StringRef> filename;
|
||||
|
||||
const char *currentChar = nullptr;
|
||||
|
||||
llvm::StringRef buf;
|
||||
|
||||
/// The position tracker that we will use to determine the end of the
|
||||
/// buffer since the buffer might not be null terminated
|
||||
size_t currentPos = static_cast<size_t>(-1);
|
||||
|
||||
Location currentLocation;
|
||||
|
||||
bool readEOL = false;
|
||||
|
||||
/// Returns a clone of the current location
|
||||
Location getCurrentLocation();
|
||||
/// Returns the next character from the stream.
|
||||
/// @param skip_whitespace An indicator to whether skip white space like chars
|
||||
/// or not
|
||||
void advance(bool skipWhitespace = false);
|
||||
void advanceByOne();
|
||||
|
||||
const char *nextChar(bool skipWhitespace = false, unsigned count = 1);
|
||||
|
||||
/// Returns a boolean indicating whether the given input character is valid
|
||||
/// for an identifier or not.
|
||||
static bool isValidForIdentifier(char c);
|
||||
|
||||
// The property to store the ast tree
|
||||
Ast ast;
|
||||
|
||||
MaybeNode readSymbol();
|
||||
MaybeNode readNumber(bool);
|
||||
MaybeNode readList();
|
||||
MaybeNode readExpr();
|
||||
|
||||
bool isEndOfBuffer(const char *);
|
||||
|
||||
public:
|
||||
Reader(llvm::StringRef buf, llvm::StringRef ns,
|
||||
std::optional<llvm::StringRef> filename);
|
||||
Reader(llvm::MemoryBufferRef buf, llvm::StringRef ns,
|
||||
std::optional<llvm::StringRef> filename);
|
||||
|
||||
// void setInput(const llvm::StringRef string);
|
||||
|
||||
/// Parses the the input and creates a possible AST out of it or errors
|
||||
/// otherwise.
|
||||
MaybeAst read();
|
||||
|
||||
~Reader();
|
||||
};
|
||||
|
||||
/// Parses the given `input` string and returns a `Result<ast>`
|
||||
/// which may contains an AST or an `llvm::Error`
|
||||
MaybeAst read(llvm::StringRef input, llvm::StringRef ns,
|
||||
std::optional<llvm::StringRef> filename);
|
||||
MaybeAst read(llvm::MemoryBufferRef input, llvm::StringRef ns,
|
||||
std::optional<llvm::StringRef> filename);
|
||||
|
||||
} // namespace serene
|
||||
#endif
|
|
@ -0,0 +1,225 @@
|
|||
/* -*- C++ -*-
|
||||
* Serene Programming Language
|
||||
*
|
||||
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "serene/source_mgr.h"
|
||||
|
||||
#include "serene/namespace.h"
|
||||
#include "serene/reader/location.h"
|
||||
#include "serene/reader/reader.h"
|
||||
#include "serene/utils.h"
|
||||
|
||||
#include <system_error>
|
||||
|
||||
#include <llvm/Support/Error.h>
|
||||
#include <llvm/Support/FormatVariadic.h>
|
||||
#include <llvm/Support/Locale.h>
|
||||
#include <llvm/Support/MemoryBufferRef.h>
|
||||
#include <llvm/Support/Path.h>
|
||||
#include <llvm/Support/raw_ostream.h>
|
||||
#include <mlir/Support/LogicalResult.h>
|
||||
|
||||
namespace serene {
|
||||
|
||||
std::string SourceMgr::convertNamespaceToPath(std::string ns_name) {
|
||||
std::replace(ns_name.begin(), ns_name.end(), '.', '/');
|
||||
|
||||
llvm::SmallString<MAX_PATH_SLOTS> path;
|
||||
path.append(ns_name);
|
||||
llvm::sys::path::native(path);
|
||||
|
||||
return std::string(path);
|
||||
};
|
||||
|
||||
bool SourceMgr::isValidBufferID(unsigned i) const {
|
||||
return i != 0 && i <= buffers.size();
|
||||
};
|
||||
|
||||
SourceMgr::MemBufPtr SourceMgr::findFileInLoadPath(const std::string &name,
|
||||
std::string &importedFile) {
|
||||
|
||||
auto path = convertNamespaceToPath(name);
|
||||
|
||||
// If the file didn't exist directly, see if it's in an include path.
|
||||
for (unsigned i = 0, e = loadPaths.size(); i != e; ++i) {
|
||||
|
||||
// TODO: Ugh, Udgly, fix this using llvm::sys::path functions
|
||||
importedFile = loadPaths[i] + llvm::sys::path::get_separator().data() +
|
||||
path + "." + DEFAULT_SUFFIX;
|
||||
|
||||
SMGR_LOG("Try to load the ns from: " + importedFile);
|
||||
auto newBufOrErr = llvm::MemoryBuffer::getFile(importedFile);
|
||||
|
||||
if (auto err = newBufOrErr.getError()) {
|
||||
llvm::consumeError(llvm::errorCodeToError(err));
|
||||
continue;
|
||||
}
|
||||
|
||||
return std::move(*newBufOrErr);
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
};
|
||||
|
||||
MaybeNS SourceMgr::readNamespace(SereneContext &ctx, std::string name,
|
||||
reader::LocationRange importLoc) {
|
||||
std::string importedFile;
|
||||
|
||||
SMGR_LOG("Attempt to load namespace: " + name);
|
||||
MemBufPtr newBufOrErr(findFileInLoadPath(name, importedFile));
|
||||
|
||||
if (newBufOrErr == nullptr) {
|
||||
auto msg = llvm::formatv("Couldn't find namespace '{0}'", name).str();
|
||||
return errors::makeError(ctx, errors::NSLoadError, importLoc, msg);
|
||||
}
|
||||
|
||||
auto bufferId = AddNewSourceBuffer(std::move(newBufOrErr), importLoc);
|
||||
|
||||
UNUSED(nsTable.insert_or_assign(name, bufferId));
|
||||
|
||||
if (bufferId == 0) {
|
||||
auto msg = llvm::formatv("Couldn't add namespace '{0}'", name).str();
|
||||
return errors::makeError(ctx, errors::NSAddToSMError, importLoc, msg);
|
||||
}
|
||||
|
||||
// Since we moved the buffer to be added as the source storage we
|
||||
// need to get a pointer to it again
|
||||
const auto *buf = getMemoryBuffer(bufferId);
|
||||
|
||||
// Read the content of the buffer by passing it the reader
|
||||
auto maybeAst = reader::read(ctx, buf->getBuffer(), name,
|
||||
std::optional(llvm::StringRef(importedFile)));
|
||||
|
||||
if (!maybeAst) {
|
||||
SMGR_LOG("Couldn't Read namespace: " + name);
|
||||
return maybeAst.takeError();
|
||||
}
|
||||
|
||||
// Create the NS and set the AST
|
||||
auto ns =
|
||||
ctx.makeNamespace(name, std::optional(llvm::StringRef(importedFile)));
|
||||
|
||||
if (auto errs = ns->addTree(*maybeAst)) {
|
||||
SMGR_LOG("Couldn't set the AST for namespace: " + name);
|
||||
return errs;
|
||||
}
|
||||
|
||||
return ns;
|
||||
};
|
||||
|
||||
unsigned SourceMgr::AddNewSourceBuffer(std::unique_ptr<llvm::MemoryBuffer> f,
|
||||
reader::LocationRange includeLoc) {
|
||||
SrcBuffer nb;
|
||||
nb.buffer = std::move(f);
|
||||
nb.importLoc = includeLoc;
|
||||
buffers.push_back(std::move(nb));
|
||||
return buffers.size();
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
static std::vector<T> &GetOrCreateOffsetCache(void *&offsetCache,
|
||||
llvm::MemoryBuffer *buffer) {
|
||||
if (offsetCache) {
|
||||
return *static_cast<std::vector<T> *>(offsetCache);
|
||||
}
|
||||
|
||||
// Lazily fill in the offset cache.
|
||||
auto *offsets = new std::vector<T>();
|
||||
size_t sz = buffer->getBufferSize();
|
||||
|
||||
// TODO: Replace this assert with a realtime check
|
||||
assert(sz <= std::numeric_limits<T>::max());
|
||||
|
||||
llvm::StringRef s = buffer->getBuffer();
|
||||
for (size_t n = 0; n < sz; ++n) {
|
||||
if (s[n] == '\n') {
|
||||
offsets->push_back(static_cast<T>(n));
|
||||
}
|
||||
}
|
||||
|
||||
offsetCache = offsets;
|
||||
return *offsets;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
const char *SourceMgr::SrcBuffer::getPointerForLineNumberSpecialized(
|
||||
unsigned lineNo) const {
|
||||
std::vector<T> &offsets =
|
||||
GetOrCreateOffsetCache<T>(offsetCache, buffer.get());
|
||||
|
||||
// We start counting line and column numbers from 1.
|
||||
if (lineNo != 0) {
|
||||
--lineNo;
|
||||
}
|
||||
|
||||
const char *bufStart = buffer->getBufferStart();
|
||||
|
||||
// The offset cache contains the location of the \n for the specified line,
|
||||
// we want the start of the line. As such, we look for the previous entry.
|
||||
if (lineNo == 0) {
|
||||
return bufStart;
|
||||
}
|
||||
|
||||
if (lineNo > offsets.size()) {
|
||||
return nullptr;
|
||||
}
|
||||
return bufStart + offsets[lineNo - 1] + 1;
|
||||
}
|
||||
|
||||
/// Return a pointer to the first character of the specified line number or
|
||||
/// null if the line number is invalid.
|
||||
const char *
|
||||
SourceMgr::SrcBuffer::getPointerForLineNumber(unsigned lineNo) const {
|
||||
size_t sz = buffer->getBufferSize();
|
||||
if (sz <= std::numeric_limits<uint8_t>::max()) {
|
||||
return getPointerForLineNumberSpecialized<uint8_t>(lineNo);
|
||||
}
|
||||
|
||||
if (sz <= std::numeric_limits<uint16_t>::max()) {
|
||||
return getPointerForLineNumberSpecialized<uint16_t>(lineNo);
|
||||
}
|
||||
|
||||
if (sz <= std::numeric_limits<uint32_t>::max()) {
|
||||
return getPointerForLineNumberSpecialized<uint32_t>(lineNo);
|
||||
}
|
||||
|
||||
return getPointerForLineNumberSpecialized<uint64_t>(lineNo);
|
||||
}
|
||||
|
||||
SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&other) noexcept
|
||||
: buffer(std::move(other.buffer)), offsetCache(other.offsetCache),
|
||||
importLoc(other.importLoc) {
|
||||
other.offsetCache = nullptr;
|
||||
}
|
||||
|
||||
SourceMgr::SrcBuffer::~SrcBuffer() {
|
||||
if (offsetCache != nullptr) {
|
||||
size_t sz = buffer->getBufferSize();
|
||||
if (sz <= std::numeric_limits<uint8_t>::max()) {
|
||||
delete static_cast<std::vector<uint8_t> *>(offsetCache);
|
||||
} else if (sz <= std::numeric_limits<uint16_t>::max()) {
|
||||
delete static_cast<std::vector<uint16_t> *>(offsetCache);
|
||||
} else if (sz <= std::numeric_limits<uint32_t>::max()) {
|
||||
delete static_cast<std::vector<uint32_t> *>(offsetCache);
|
||||
} else {
|
||||
delete static_cast<std::vector<uint64_t> *>(offsetCache);
|
||||
}
|
||||
offsetCache = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
}; // namespace serene
|
|
@ -0,0 +1,190 @@
|
|||
/* -*- C++ -*-
|
||||
* Serene Programming Language
|
||||
*
|
||||
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef SERENE_SOURCE_MGR_H
|
||||
#define SERENE_SOURCE_MGR_H
|
||||
|
||||
#include "location.h"
|
||||
|
||||
#include <llvm/ADT/SmallVector.h>
|
||||
#include <llvm/ADT/StringMap.h>
|
||||
#include <llvm/Support/ErrorHandling.h>
|
||||
#include <llvm/Support/ErrorOr.h>
|
||||
#include <llvm/Support/MemoryBuffer.h>
|
||||
#include <llvm/Support/SourceMgr.h>
|
||||
#include <mlir/IR/Diagnostics.h>
|
||||
#include <mlir/Support/Timing.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#define SMGR_LOG(...) \
|
||||
DEBUG_WITH_TYPE("sourcemgr", llvm::dbgs() \
|
||||
<< "[SMGR]: " << __VA_ARGS__ << "\n");
|
||||
|
||||
namespace serene {
|
||||
class SereneContext;
|
||||
|
||||
/// This class is quite similar to the `llvm::SourceMgr` in functionality. We
|
||||
/// even borrowed some of the code from the original implementation but removed
|
||||
/// a lot of code that were irrelevant to us.
|
||||
///
|
||||
/// SouceMgr is responsible for finding a namespace in the `loadPaths` and read
|
||||
/// the content of the `.srn` (or any of the `DEFAULT_SUFFIX`) into a
|
||||
/// `llvm::MemoryBuffer` embedded in a `SrcBuffer` object as the owner of the
|
||||
/// source files and then it will call the `reader` on the buffer to parse it
|
||||
/// and create the actual `Namespace` object from the parsed AST.
|
||||
///
|
||||
/// Later on, whenever we need to refer to the source file of a namespace for
|
||||
/// diagnosis purposes or any other purpose we can use the functions in this
|
||||
/// class to get hold of a pointer to a specific `Location` of the
|
||||
/// buffer.
|
||||
///
|
||||
/// Note: Unlike the original version, SourceMgr does not handle the diagnostics
|
||||
/// and it uses the Serene's `DiagnosticEngine` for that matter.
|
||||
class SourceMgr {
|
||||
|
||||
public:
|
||||
// TODO: Make it a vector of supported suffixes
|
||||
constexpr static const char *DEFAULT_SUFFIX = "srn";
|
||||
|
||||
private:
|
||||
struct SrcBuffer {
|
||||
/// The memory buffer for the file.
|
||||
std::unique_ptr<llvm::MemoryBuffer> buffer;
|
||||
|
||||
/// Vector of offsets into Buffer at which there are line-endings
|
||||
/// (lazily populated). Once populated, the '\n' that marks the end of
|
||||
/// line number N from [1..] is at Buffer[OffsetCache[N-1]]. Since
|
||||
/// these offsets are in sorted (ascending) order, they can be
|
||||
/// binary-searched for the first one after any given offset (eg. an
|
||||
/// offset corresponding to a particular SMLoc).
|
||||
///
|
||||
/// Since we're storing offsets into relatively small files (often smaller
|
||||
/// than 2^8 or 2^16 bytes), we select the offset vector element type
|
||||
/// dynamically based on the size of Buffer.
|
||||
mutable void *offsetCache = nullptr;
|
||||
|
||||
/// Look up a given \p ptr in in the buffer, determining which line it came
|
||||
/// from.
|
||||
unsigned getLineNumber(const char *ptr) const;
|
||||
template <typename T>
|
||||
unsigned getLineNumberSpecialized(const char *ptr) const;
|
||||
|
||||
/// Return a pointer to the first character of the specified line number or
|
||||
/// null if the line number is invalid.
|
||||
const char *getPointerForLineNumber(unsigned lineNo) const;
|
||||
|
||||
template <typename T>
|
||||
const char *getPointerForLineNumberSpecialized(unsigned lineNo) const;
|
||||
|
||||
/// This is the location of the parent import or unknown location if it is
|
||||
/// the main namespace
|
||||
LocationRange importLoc;
|
||||
|
||||
SrcBuffer() = default;
|
||||
SrcBuffer(SrcBuffer &&) noexcept;
|
||||
SrcBuffer(const SrcBuffer &) = delete;
|
||||
SrcBuffer &operator=(const SrcBuffer &) = delete;
|
||||
~SrcBuffer();
|
||||
};
|
||||
using MemBufPtr = std::unique_ptr<llvm::MemoryBuffer>;
|
||||
|
||||
/// This is all of the buffers that we are reading from.
|
||||
std::vector<SrcBuffer> buffers;
|
||||
|
||||
/// A hashtable that works as an index from namespace names to the buffer
|
||||
/// position it the `buffer`
|
||||
llvm::StringMap<unsigned> nsTable;
|
||||
|
||||
// This is the list of directories we should search for include files in.
|
||||
std::vector<std::string> loadPaths;
|
||||
|
||||
// Find a namespace file with the given \p name in the load path and \r retuns
|
||||
// a unique pointer to the memory buffer containing the content or an error.
|
||||
// In the success case it will put the path of the file into the \p
|
||||
// importedFile.
|
||||
MemBufPtr findFileInLoadPath(const std::string &name,
|
||||
std::string &importedFile);
|
||||
|
||||
bool isValidBufferID(unsigned i) const;
|
||||
|
||||
/// Converts the ns name to a partial path by replacing the dots with slashes
|
||||
static std::string convertNamespaceToPath(std::string ns_name);
|
||||
|
||||
public:
|
||||
SourceMgr() = default;
|
||||
SourceMgr(const SourceMgr &) = delete;
|
||||
SourceMgr &operator=(const SourceMgr &) = delete;
|
||||
SourceMgr(SourceMgr &&) = default;
|
||||
SourceMgr &operator=(SourceMgr &&) = default;
|
||||
~SourceMgr() = default;
|
||||
|
||||
/// Set the `loadPaths` to the given \p dirs. `loadPaths` is a vector of
|
||||
/// directories that Serene will look in order to find a file that constains a
|
||||
/// namespace which it is looking for.
|
||||
void setLoadPaths(std::vector<std::string> &dirs) { loadPaths.swap(dirs); }
|
||||
|
||||
/// Return a reference to a `SrcBuffer` with the given ID \p i.
|
||||
const SrcBuffer &getBufferInfo(unsigned i) const {
|
||||
assert(isValidBufferID(i));
|
||||
return buffers[i - 1];
|
||||
}
|
||||
|
||||
/// Return a reference to a `SrcBuffer` with the given namspace name \p ns.
|
||||
const SrcBuffer &getBufferInfo(llvm::StringRef ns) const {
|
||||
auto bufferId = nsTable.lookup(ns);
|
||||
|
||||
if (bufferId == 0) {
|
||||
// No such namespace
|
||||
llvm_unreachable("couldn't find the src buffer for a namespace. It "
|
||||
"should never happen.");
|
||||
}
|
||||
|
||||
return buffers[bufferId - 1];
|
||||
}
|
||||
|
||||
/// Return a pointer to the internal `llvm::MemoryBuffer` of the `SrcBuffer`
|
||||
/// with the given ID \p i.
|
||||
const llvm::MemoryBuffer *getMemoryBuffer(unsigned i) const {
|
||||
assert(isValidBufferID(i));
|
||||
return buffers[i - 1].buffer.get();
|
||||
}
|
||||
|
||||
unsigned getNumBuffers() const { return buffers.size(); }
|
||||
|
||||
/// Add a new source buffer to this source manager. This takes ownership of
|
||||
/// the memory buffer.
|
||||
unsigned AddNewSourceBuffer(std::unique_ptr<llvm::MemoryBuffer> f,
|
||||
LocationRange includeLoc);
|
||||
|
||||
/// Lookup for a file containing the namespace definition of with given
|
||||
/// namespace name \p name. In case that the file exists, it returns an
|
||||
/// `ErrorTree`. It will use the parser to read the file and create an AST
|
||||
/// from it. Then create a namespace, set the its AST to the AST that we just
|
||||
/// read from the file and return a shared pointer to the namespace.
|
||||
///
|
||||
/// \p importLoc is a location in the source code where the give namespace is
|
||||
/// imported.
|
||||
MaybeNS readNamespace(SereneContext &ctx, std::string name,
|
||||
LocationRange importLoc);
|
||||
};
|
||||
|
||||
}; // namespace serene
|
||||
|
||||
#endif
|
|
@ -0,0 +1,86 @@
|
|||
/* -*- C -*-
|
||||
* Serene Programming Language
|
||||
*
|
||||
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef TYPES_H
|
||||
#define TYPES_H
|
||||
|
||||
#include "serene/config.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
typedef struct {
|
||||
const TypeID id;
|
||||
const char *name;
|
||||
|
||||
} Type;
|
||||
|
||||
typedef struct {
|
||||
const Type type;
|
||||
const void *data;
|
||||
} Object;
|
||||
|
||||
static const Type type = {.id = TYPE, .name = "type"};
|
||||
static const Type nil_type = {.id = NIL, .name = "nil"};
|
||||
static const Type function_type = {.id = FN, .name = "function"};
|
||||
static const Type protocol_type = {.id = PROTOCOL, .name = "protocol"};
|
||||
static const Type int_type = {.id = INT, .name = "int"};
|
||||
static const Type list_type = {.id = LIST, .name = "list"};
|
||||
|
||||
typedef struct {
|
||||
const Type type;
|
||||
const Type **args;
|
||||
const Type *returnType;
|
||||
} FunctionType;
|
||||
|
||||
typedef struct {
|
||||
const Type type;
|
||||
const char *name;
|
||||
const FunctionType **functions;
|
||||
} ProtocolType;
|
||||
|
||||
typedef struct {
|
||||
const Type type;
|
||||
const Type first;
|
||||
const Type second;
|
||||
} PairType;
|
||||
|
||||
typedef struct {
|
||||
const PairType type;
|
||||
void *first;
|
||||
void *second;
|
||||
} Pair;
|
||||
|
||||
typedef struct {
|
||||
const Pair *head;
|
||||
const unsigned int len;
|
||||
} List;
|
||||
|
||||
typedef struct {
|
||||
const char *name;
|
||||
} Symbol;
|
||||
|
||||
typedef struct {
|
||||
const char *data;
|
||||
const unsigned int len;
|
||||
} String;
|
||||
|
||||
typedef struct {
|
||||
const long data;
|
||||
} Number;
|
||||
|
||||
#endif
|
|
@ -0,0 +1,138 @@
|
|||
/* -*- C++ -*-
|
||||
* Serene Programming Language
|
||||
*
|
||||
* Copyright (c) 2019-2023 Sameer Rahmani <lxsameer@gnu.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, version 2.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef UTILS_H
|
||||
#define UTILS_H
|
||||
|
||||
#include <llvm/Support/Error.h>
|
||||
|
||||
#include <variant>
|
||||
|
||||
// Sometimes we need this to make both analyzer happy
|
||||
// and the fn signature right.
|
||||
#define UNUSED(x) (void)(x)
|
||||
|
||||
// We use this value with llvm::SmallString<MAX_PATH_SLOTS>
|
||||
#define MAX_PATH_SLOTS 256
|
||||
// C++17 required. We can't go back to 14 any more :))
|
||||
|
||||
namespace serene {
|
||||
|
||||
/// A similar type to Rust's Result data structure. It either holds a value of
|
||||
/// type `T` successfully or holds a value of type `E` errorfully. It is
|
||||
/// designed to be used in situations which the return value of a function might
|
||||
/// contains some errors. The official way to use this type is to use the
|
||||
/// factory functions `Success` and `Error`. For example:
|
||||
///
|
||||
/// \code
|
||||
/// auto successfulResult = Result<int>::success(3);
|
||||
/// auto notOkResult = Result<int>::error(SomeLLVMError());
|
||||
// \endcode
|
||||
///
|
||||
/// In order check for a value being errorful or successful checkout the `ok`
|
||||
/// method or simply use the value as a conditiona.
|
||||
///
|
||||
/// This class is setup in a way tha you can us a value of type `T` in places
|
||||
/// that the compiler expects a `Result<T>`. So for example:
|
||||
///
|
||||
/// \code
|
||||
/// Result<int> fn() {return 2;}
|
||||
/// \endcode
|
||||
///
|
||||
/// works perfectly.
|
||||
template <typename T, typename E = llvm::Error>
|
||||
class Result {
|
||||
|
||||
// The actual data container
|
||||
std::variant<T, E> contents;
|
||||
|
||||
/// The main constructor which we made private to avoid ambiguousness in
|
||||
/// input type. `Success` and `Error` call this ctor.
|
||||
template <typename InPlace, typename Content>
|
||||
Result(InPlace i, Content &&c) : contents(i, std::forward<Content>(c)){};
|
||||
|
||||
public:
|
||||
explicit constexpr Result(const T &v)
|
||||
: Result(std::in_place_index_t<0>(), std::move(v)){};
|
||||
|
||||
/// Return a pointer to the success case value of the result. It is
|
||||
/// important to check for the success case before calling this function.
|
||||
constexpr const T *getPointer() const { return &getValue(); }
|
||||
|
||||
/// Return a pointer to the success case value of the result. It is
|
||||
/// important to check for the success case before calling this function.
|
||||
T *getPointer() { return &getValue(); }
|
||||
|
||||
/// Return a pointer to the success case value of the result. It is
|
||||
/// important to check for the success case before calling this function.
|
||||
T *operator->() { return getPointer(); }
|
||||
|
||||
/// Return a pointer to the success case value of the result. It is
|
||||
/// important to check for the success case before calling this function.
|
||||
constexpr const T *operator->() const { return getPointer(); }
|
||||
|
||||
/// Dereference the success case and returns the value. It is
|
||||
/// important to check for the success case before calling this function.
|
||||
constexpr const T &operator*() const & { return getValue(); }
|
||||
|
||||
/// Dereference the success case and returns the value. It is
|
||||
/// important to check for the success case before calling this function.
|
||||
T &operator*() & { return getValue(); }
|
||||
|
||||
/// Create a succesfull result with the given value of type `T`.
|
||||
static Result success(T v) {
|
||||
return Result(std::in_place_index_t<0>(), std::move(v));
|
||||
}
|
||||
|
||||
/// Create an errorful result with the given value of type `E` (default
|
||||
/// `llvm::Error`).
|
||||
static Result error(E e) {
|
||||
return Result(std::in_place_index_t<1>(), std::move(e));
|
||||
}
|
||||
|
||||
/// Return the value if it's successful otherwise throw an error
|
||||
T &&getValue() && { return std::move(std::get<0>(contents)); };
|
||||
|
||||
/// Return the error value if it's errorful otherwise throw an error
|
||||
E &&getError() && { return std::move(std::get<1>(contents)); };
|
||||
|
||||
// using std::get, it'll throw if contents doesn't contain what you ask for
|
||||
|
||||
/// Return the value if it's successful otherwise throw an error
|
||||
T &getValue() & { return std::get<0>(contents); };
|
||||
|
||||
/// Return the error value if it's errorful otherwise throw an error
|
||||
E &getError() & { return std::get<1>(contents); };
|
||||
|
||||
const T &getValue() const & { return std::get<0>(contents); }
|
||||
const E &getError() const & { return std::get<1>(contents); }
|
||||
|
||||
/// Return the a boolean value indicating whether the value is succesful
|
||||
/// or errorful.
|
||||
bool ok() const { return std::holds_alternative<T>(contents); };
|
||||
|
||||
operator bool() const { return ok(); }
|
||||
};
|
||||
|
||||
inline void makeFQSymbolName(const llvm::StringRef &ns,
|
||||
const llvm::StringRef &sym, std::string &result) {
|
||||
result = (ns + "/" + sym).str();
|
||||
};
|
||||
|
||||
} // namespace serene
|
||||
#endif
|
Loading…
Reference in New Issue