Add the a source manger and integrate the reader with it

This commit is contained in:
Sameer Rahmani 2021-09-05 15:57:28 +01:00
parent f980da8e4e
commit f357b5e9d3
18 changed files with 1361 additions and 194 deletions

View File

@ -47,7 +47,7 @@ if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
add_compile_options(-fno-rtti)
configure_file(${INCLUDE_DIR}/config.h.in config.h)
configure_file(${INCLUDE_DIR}/serene/config.h.in serene/config.h)
# Let's nicely support folders in IDEs
set_property(GLOBAL PROPERTY USE_FOLDERS ON)

View File

@ -24,6 +24,7 @@
#include "serene/serene.h"
#include "serene/config.h"
#include "serene/context.h"
#include "serene/jit.h"
#include "serene/namespace.h"
@ -72,19 +73,26 @@ enum Action {
RunJIT,
};
}
static std::string banner =
llvm::formatv("\n\nSerene Compiler Version {0}"
"\nCopyright (C) 2019-2021 "
"Sameer Rahmani <lxsameer@gnu.org>\n"
"Serene comes with ABSOLUTELY NO WARRANTY;\n"
"This is free software, and you are welcome\n"
"to redistribute it under certain conditions; \n"
"for details take a look at the LICENSE file.\n",
SERENE_VERSION);
static cl::opt<std::string> inputFile(cl::Positional,
cl::desc("The Serene file to compile"),
cl::init("-"),
cl::value_desc("filename"));
static cl::opt<std::string> inputNS(cl::Positional, cl::desc("<namespace>"),
cl::Required);
static cl::opt<std::string> outputFile(
"o", cl::desc("The relative path to the output file from the build dir"),
cl::init("-"), cl::value_desc("filename"));
static cl::opt<std::string>
outputDir("build-dir", cl::desc("The absolute path to the build directory"),
cl::init("-"), cl::value_desc("filename"));
outputDir("b", cl::desc("The absolute path to the build directory"),
cl::value_desc("filename"), cl::Required);
static cl::opt<enum Action> emitAction(
"emit", cl::desc("Select what to dump."), cl::init(Compile),
@ -106,26 +114,11 @@ static cl::opt<enum Action> emitAction(
);
exprs::Ast readInputFile() {
auto r = make_unique<reader::FileReader>(inputFile);
auto maybeAst = r->read();
if (!maybeAst) {
throw std::move(maybeAst.getError());
}
return maybeAst.getValue();
};
exprs::Ast readAndAnalyze(SereneContext &ctx) {
auto ast = readInputFile();
auto afterAst = reader::analyze(ctx, ast);
if (!afterAst) {
throw std::move(afterAst.getError());
}
return afterAst.getValue();
};
llvm::cl::OptionCategory clOptionsCategory{"Discovery options"};
static cl::list<std::string>
loadPaths("l", cl::desc("The load path to use for compilation."),
llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::PositionalEatsArgs,
llvm::cl::cat(clOptionsCategory));
int dumpAsObject(Namespace &ns) {
// TODO: Move the compilation process to the Namespace class
@ -237,11 +230,20 @@ int main(int argc, char *argv[]) {
llvm::InitializeAllAsmParsers();
llvm::InitializeAllAsmPrinters();
cl::ParseCommandLineOptions(argc, argv, "Serene compiler \n");
auto ctx = makeSereneContext();
auto ns = makeNamespace(*ctx, "user", llvm::None);
cl::ParseCommandLineOptions(argc, argv, banner);
auto ctx = makeSereneContext();
auto userNS = makeNamespace(*ctx, "user", llvm::None);
// TODO: We might want to find a better place for this
applyPassManagerCLOptions(ctx->pm);
ctx->sourceManager.setLoadPaths(loadPaths);
auto runLoc = llvm::SMLoc();
auto ns = ctx->sourceManager.readNamespace(*ctx, inputNS, runLoc, true);
if (!ns) {
return (int)std::errc::no_such_file_or_directory;
}
// TODO: handle the outputDir by not forcing it. it should be
// default to the current working dir
@ -261,13 +263,21 @@ int main(int argc, char *argv[]) {
// Just print out the raw AST
case Action::DumpAST: {
auto ast = readInputFile();
auto ast = ns->getTree();
llvm::outs() << exprs::astToString(&ast) << "\n";
return 0;
};
case Action::DumpSemantic: {
auto ast = readAndAnalyze(*ctx);
auto ast = ns->getTree();
auto afterAst = reader::analyze(*ctx, ast);
if (!afterAst) {
throw std::move(afterAst.getError());
}
ast = afterAst.getValue();
llvm::outs() << exprs::astToString(&ast) << "\n";
return 0;
};
@ -308,9 +318,13 @@ int main(int argc, char *argv[]) {
}
}
auto afterAst = readAndAnalyze(*ctx);
auto isSet = ns->setTree(afterAst);
// Perform the semantic analytics
auto afterAst = reader::analyze(*ctx, ns->getTree());
if (!afterAst) {
throw std::move(afterAst.getError());
}
auto isSet = ns->setTree(afterAst.getValue());
if (isSet.succeeded()) {
ctx->insertNS(ns);
switch (emitAction) {

12
builder
View File

@ -52,7 +52,7 @@ function popd_build() {
function compile() {
pushed_build
ninja -j "$(nproc)"
cmake --build .
popd_build
}
@ -61,28 +61,28 @@ function build() {
echo "Running: "
echo "cmake -G Ninja $CMAKE_CCACHE -DCMAKE_BUILD_TYPE=Debug \"$@\" \"$ROOT_DIR\""
cmake -G Ninja $CMAKE_CCACHE -DCMAKE_BUILD_TYPE=Debug "$@" "$ROOT_DIR"
ninja -j "$(nproc)"
cmake --build .
popd_build
}
function build-20() {
pushed_build
cmake -G Ninja $CMAKE_CCACHE -DCMAKE_BUILD_TYPE=Debug -DCPP_20_SUPPORT=ON "$@" "$ROOT_DIR"
ninja -j "$(nproc)"
cmake --build .
popd_build
}
function build-release() {
pushed_build
cmake -G Ninja -DCMAKE_BUILD_TYPE=Release "$ROOT_DIR"
ninja -j "$(nproc)"
cmake --build .
popd_build
}
function build-docs() {
pushed_build
cmake -G Ninja $CMAKE_CCACHE -DCMAKE_BUILD_TYPE=Docs "$ROOT_DIR"
ninja -j "$(nproc)"
cmake --build .
popd_build
}
@ -111,7 +111,7 @@ function run-tests() {
function tests() {
pushed_build
cmake -G Ninja -DCMAKE_BUILD_TYPE=Debug -DBUILD_TESTING=ON "$ROOT_DIR"
ninja -j "$(nproc)"
cmake --build .
popd_build
}

View File

@ -1,2 +1,4 @@
(def main (fn () 3))
(def main1 (fn (v y n) 3))
ht
(- 3 4 (sh - r e 3bea -32) ((((())))))

View File

@ -375,3 +375,4 @@ define i64 @main1(i64 %0, i64 %1, i64 %2) !dbg !9 {
- https://mlir.llvm.org/docs
- https://mlir.llvm.org/docs/LangRef
- https://en.wikipedia.org/wiki/Basic_block

View File

@ -1,8 +1,7 @@
#ifndef CONFIG_H
#define CONFIG_H
// the configured options and settings for Tutorial
#define SERENE_VERSION_MAJOR @Serene_VERSION_MAJOR@
#define SERENE_VERSION_MINOR @Serene_VERSION_MINOR@
#define SERENE_VERSION "@PROJECT_VERSION@"
#cmakedefine ENABLE_READER_LOG
#cmakedefine ENABLE_EXPR_LOG

View File

@ -29,6 +29,10 @@
#include "serene/namespace.h"
#include "serene/passes.h"
#include "serene/slir/dialect.h"
#include "serene/source_mgr.h"
#include "llvm/ADT/Optional.h"
#include "llvm/Support/SMLoc.h"
#include <llvm/ADT/StringRef.h>
#include <llvm/IR/LLVMContext.h>
@ -67,6 +71,11 @@ public:
llvm::LLVMContext llvmContext;
mlir::MLIRContext mlirContext;
mlir::PassManager pm;
/// The source manager is responsible for loading namespaces and practically
/// managing the source code in form of memory buffers.
SourceMgr sourceManager;
std::string targetTriple;
/// Insert the given `ns` into the context. The Context object is
@ -104,6 +113,9 @@ public:
CompilationPhase getTargetPhase() { return targetPhase; };
int getOptimizatioLevel();
NSPtr readNamespace(std::string name);
NSPtr readNamespace(std::string name, llvm::SMLoc loc);
private:
CompilationPhase targetPhase;

View File

@ -115,13 +115,14 @@ public:
~Namespace();
};
using NSPtr = std::shared_ptr<Namespace>;
/// Create a naw namespace with the given `name` and optional `filename` and
/// return a shared pointer to it in the given Serene context. If the
/// `setCurrent` argument is set to true, the created NS will become the curret
/// namespace in the context
std::shared_ptr<Namespace>
makeNamespace(SereneContext &ctx, llvm::StringRef name,
llvm::Optional<llvm::StringRef> filename, bool setCurrent = true);
NSPtr makeNamespace(SereneContext &ctx, llvm::StringRef name,
llvm::Optional<llvm::StringRef> filename,
bool setCurrent = true);
} // namespace serene

View File

@ -37,12 +37,21 @@ namespace reader {
/// It represents a location in the input string to the parser via `line`,
struct Location {
int pos; // Position of in the input string.
int line;
int col;
/// A pointer to the character that this location is pointing to
/// it the input buffer
const char *c;
/// The id of the buffer that this location belongs too.
unsigned bufferId;
/// At this stage we only support 65535 lines of code in each file
unsigned short int line;
/// At this stage we only support 65535 chars in each line
unsigned short int col;
::std::string toString() const;
static Location unit() { return {0, 0, 0}; };
Location() = default;
Location clone();
};
class LocationRange {
@ -50,14 +59,14 @@ public:
Location start;
Location end;
LocationRange() : start(Location{0, 0, 0}), end(Location{0, 0, 0}){};
LocationRange() = default;
LocationRange(Location _start) : start(_start), end(_start){};
LocationRange(Location _start, Location _end) : start(_start), end(_end){};
LocationRange(const LocationRange &);
};
void incLocation(Location &, bool);
void decLocation(Location &, bool);
void incLocation(Location &, const char *);
void decLocation(Location &, const char *);
} // namespace reader
} // namespace serene

View File

@ -34,6 +34,8 @@
#include "serene/serene.h"
#include <llvm/Support/Debug.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Support/MemoryBufferRef.h>
#include <llvm/Support/raw_ostream.h>
#include <memory>
#include <sstream>
@ -51,14 +53,24 @@ namespace serene::reader {
/// Base reader class which reads from a string directly.
class Reader {
private:
char current_char = ';'; // Some arbitary char to begin with
std::stringstream input_stream;
Location current_location{0, 0, 0};
SereneContext &ctx;
const char *current_char = NULL;
llvm::StringRef buf;
/// The position tracker that we will use to determine the end of the
/// buffer since the buffer might not be null terminated
size_t current_pos = -1;
Location current_location;
/// Returns a clone of the current location
Location getCurrentLocation();
/// Returns the next character from the stream.
/// @param skip_whitespace An indicator to whether skip white space like chars
/// or not
char getChar(bool skip_whitespace);
const char *getChar(bool skip_whitespace);
/// Unreads the current character by moving the char pointer to the previous
/// char.
@ -76,11 +88,13 @@ private:
exprs::Node readList();
exprs::Node readExpr();
public:
Reader() : input_stream(""){};
Reader(const llvm::StringRef string);
bool isEndOfBuffer(const char *);
void setInput(const llvm::StringRef string);
public:
Reader(SereneContext &ctx, llvm::StringRef buf);
Reader(SereneContext &ctx, llvm::MemoryBufferRef buf);
// void setInput(const llvm::StringRef string);
/// Parses the the input and creates a possible AST out of it or errors
/// otherwise.
@ -89,23 +103,9 @@ public:
~Reader();
};
/// A reader to read the content of a file as AST
class FileReader {
std::string file;
Reader *reader;
public:
FileReader(const std::string file_name)
: file(file_name), reader(new Reader()) {}
Result<exprs::Ast> read();
~FileReader();
};
/// Parses the given `input` string and returns a `Result<ast>`
/// which may contains an AST or an `llvm::Error`
Result<exprs::Ast> read(llvm::StringRef input);
Result<exprs::Ast> read(SereneContext &ctx, const llvm::StringRef input);
Result<exprs::Ast> read(SereneContext &ctx, const llvm::MemoryBufferRef but);
} // namespace serene::reader
#endif

View File

@ -25,9 +25,12 @@
#ifndef SERENE_H
#define SERENE_H
#include "serene/context.h"
#include "serene/source_mgr.h"
// Sometimes we need this to make both analyzer happy
// and the fn signature right.
#define UNUSED(x) (void)(x)
namespace serene {}
namespace serene {} // namespace serene
#endif

View File

@ -0,0 +1,122 @@
/* -*- C++ -*-
* Serene programming language.
*
* Copyright (c) 2019-2021 Sameer Rahmani <lxsameer@gnu.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef SERENE_SLIR_TRAITS_H
#define SERENE_SLIR_TRAITS_H
#include "serene/slir/dialect.h"
#include "serene/traits.h"
#include <llvm/ADT/STLExtras.h>
#include <llvm/IR/Module.h>
#include <llvm/Support/Casting.h>
#include <llvm/Support/TargetSelect.h>
#include <mlir/ExecutionEngine/ExecutionEngine.h>
#include <mlir/ExecutionEngine/OptUtils.h>
#include <mlir/IR/BuiltinOps.h>
#include <mlir/IR/MLIRContext.h>
#include <mlir/Support/LogicalResult.h>
#include <mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h>
#include <mlir/Target/LLVMIR/ModuleTranslation.h>
#include <stdexcept>
#include <utility>
namespace serene {
class Namespace;
class SereneContext;
} // namespace serene
namespace serene::slir {
template <typename T>
class GeneratableUnit : public TraitBase<T, GeneratableUnit> {
public:
GeneratableUnit(){};
GeneratableUnit(const GeneratableUnit &) = delete;
void generate(serene::Namespace &ns) { this->Object().generateIR(ns); };
};
template <typename T>
class Generatable : public TraitBase<T, Generatable> {
public:
Generatable(){};
Generatable(const Generatable &) = delete;
mlir::LogicalResult generate() { return this->Object().generate(); };
mlir::LogicalResult runPasses() { return this->Object().runPasses(); };
mlir::ModuleOp &getModule() { return this->Object().getModule(); };
serene::SereneContext &getContext() { return this->Object().getContext(); };
void dump() { this->Object().dump(); };
};
template <typename T>
mlir::LogicalResult generate(Generatable<T> &t) {
return t.generate();
};
template <typename T>
std::unique_ptr<llvm::Module> toLLVMIR(Generatable<T> &t) {
auto &module = t.getModule();
auto &ctx = t.getContext();
// Register the translation to LLVM IR with the MLIR context.
mlir::registerLLVMDialectTranslation(ctx.mlirContext);
// Convert the module to LLVM IR in a new LLVM IR context.
auto llvmModule = mlir::translateModuleToLLVMIR(module, ctx.llvmContext);
if (!llvmModule) {
// TODO: Return a Result type instead
llvm::errs() << "Failed to emit LLVM IR\n";
throw std::runtime_error("Failed to emit LLVM IR\n");
}
// Initialize LLVM targets.
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
// TODO: replace this call with our own version of setupTargetTriple
mlir::ExecutionEngine::setupTargetTriple(llvmModule.get());
/// Optionally run an optimization pipeline over the llvm module.
auto optPipeline = mlir::makeOptimizingTransformer(
/*optLevel=*/ctx.getOptimizatioLevel(), /*sizeLevel=*/0,
/*targetMachine=*/nullptr);
if (auto err = optPipeline(llvmModule.get())) {
llvm::errs() << "Failed to optimize LLVM IR " << err << "\n";
throw std::runtime_error("Failed to optimize LLVM IR");
}
return std::move(llvmModule);
};
template <typename T>
void dump(Generatable<T> &t) {
t.dump();
};
} // namespace serene::slir
#endif

299
include/serene/source_mgr.h Normal file
View File

@ -0,0 +1,299 @@
/* -*- C++ -*-
* Serene programming language.
*
* Copyright (c) 2019-2021 Sameer Rahmani <lxsameer@gnu.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef SERENE_SOURCE_MGR_H
#define SERENE_SOURCE_MGR_H
#include "serene/namespace.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
#include <llvm/Support/SourceMgr.h>
#include <memory>
#define SMGR_LOG(...) \
DEBUG_WITH_TYPE("sourcemgr", llvm::dbgs() \
<< "[SMGR]: " << __VA_ARGS__ << "\n");
namespace serene {
class SereneContext;
class SMDiagnostic;
class SourceMgr {
public:
std::string DEFAULT_SUFFIX = "srn";
enum DiagKind {
DK_Error,
DK_Warning,
DK_Remark,
DK_Note,
};
/// Clients that want to handle their own diagnostics in a custom way can
/// register a function pointer+context as a diagnostic handler.
/// It gets called each time PrintMessage is invoked.
using DiagHandlerTy = void (*)(const SMDiagnostic &, void *context);
private:
struct SrcBuffer {
/// The memory buffer for the file.
std::unique_ptr<llvm::MemoryBuffer> buffer;
/// Vector of offsets into Buffer at which there are line-endings
/// (lazily populated). Once populated, the '\n' that marks the end of
/// line number N from [1..] is at Buffer[OffsetCache[N-1]]. Since
/// these offsets are in sorted (ascending) order, they can be
/// binary-searched for the first one after any given offset (eg. an
/// offset corresponding to a particular SMLoc).
///
/// Since we're storing offsets into relatively small files (often smaller
/// than 2^8 or 2^16 bytes), we select the offset vector element type
/// dynamically based on the size of Buffer.
mutable void *offsetCache = nullptr;
/// Look up a given \p Ptr in in the buffer, determining which line it came
/// from.
unsigned getLineNumber(const char *ptr) const;
template <typename T>
unsigned getLineNumberSpecialized(const char *ptr) const;
/// Return a pointer to the first character of the specified line number or
/// null if the line number is invalid.
const char *getPointerForLineNumber(unsigned lineNo) const;
template <typename T>
const char *getPointerForLineNumberSpecialized(unsigned lineNo) const;
/// This is the location of the parent include, or null if at the top level.
llvm::SMLoc includeLoc;
SrcBuffer() = default;
SrcBuffer(SrcBuffer &&);
SrcBuffer(const SrcBuffer &) = delete;
SrcBuffer &operator=(const SrcBuffer &) = delete;
~SrcBuffer();
};
/// This is all of the buffers that we are reading from.
std::vector<SrcBuffer> buffers;
// This is the list of directories we should search for include files in.
std::vector<std::string> loadPaths;
DiagHandlerTy diagHandler = nullptr;
void *diagContext = nullptr;
bool isValidBufferID(unsigned i) const { return i && i <= buffers.size(); }
/// Converts the ns name to a partial path by replacing the dots with slashes
std::string inline convertNamespaceToPath(std::string ns_name);
public:
SourceMgr() = default;
SourceMgr(const SourceMgr &) = delete;
SourceMgr &operator=(const SourceMgr &) = delete;
SourceMgr(SourceMgr &&) = default;
SourceMgr &operator=(SourceMgr &&) = default;
~SourceMgr() = default;
void setLoadPaths(const std::vector<std::string> &dirs) { loadPaths = dirs; }
/// Specify a diagnostic handler to be invoked every time PrintMessage is
/// called. \p Ctx is passed into the handler when it is invoked.
void setDiagHandler(DiagHandlerTy dh, void *ctx = nullptr) {
diagHandler = dh;
diagContext = ctx;
}
DiagHandlerTy getDiagHandler() const { return diagHandler; }
void *getDiagContext() const { return diagContext; }
const SrcBuffer &getBufferInfo(unsigned i) const {
assert(isValidBufferID(i));
return buffers[i - 1];
}
const llvm::MemoryBuffer *getMemoryBuffer(unsigned i) const {
assert(isValidBufferID(i));
return buffers[i - 1].buffer.get();
}
unsigned getNumBuffers() const { return buffers.size(); }
unsigned getMainFileID() const {
assert(getNumBuffers());
return 1;
}
llvm::SMLoc getParentIncludeLoc(unsigned i) const {
assert(isValidBufferID(i));
return buffers[i - 1].includeLoc;
}
/// Add a new source buffer to this source manager. This takes ownership of
/// the memory buffer.
unsigned AddNewSourceBuffer(std::unique_ptr<llvm::MemoryBuffer> f,
llvm::SMLoc includeLoc) {
SrcBuffer nb;
nb.buffer = std::move(f);
nb.includeLoc = includeLoc;
buffers.push_back(std::move(nb));
return buffers.size();
}
/// Search for a file with the specified name in the current directory or in
/// one of the IncludeDirs.
///
/// If no file is found, this returns 0, otherwise it returns the buffer ID
/// of the stacked file. The full path to the included file can be found in
/// \p IncludedFile.
unsigned AddIncludeFile(const std::string &filename, llvm::SMLoc includeLoc,
std::string &includedFile);
NSPtr readNamespace(SereneContext &ctx, std::string name,
llvm::SMLoc importLoc, bool entryNS = false);
/// Return the ID of the buffer containing the specified location.
///
/// 0 is returned if the buffer is not found.
unsigned FindBufferContainingLoc(llvm::SMLoc loc) const;
/// Find the line number for the specified location in the specified file.
/// This is not a fast method.
unsigned FindLineNumber(llvm::SMLoc loc, unsigned bufferID = 0) const {
return getLineAndColumn(loc, bufferID).first;
}
/// Find the line and column number for the specified location in the
/// specified file. This is not a fast method.
std::pair<unsigned, unsigned> getLineAndColumn(llvm::SMLoc loc,
unsigned bufferID = 0) const;
/// Get a string with the \p llvm::SMLoc filename and line number
/// formatted in the standard style.
std::string getFormattedLocationNoOffset(llvm::SMLoc loc,
bool includePath = false) const;
/// Given a line and column number in a mapped buffer, turn it into an
/// llvm::SMLoc. This will return a null llvm::SMLoc if the line/column
/// location is invalid.
llvm::SMLoc FindLocForLineAndColumn(unsigned bufferID, unsigned lineNo,
unsigned colNo);
/// Emit a message about the specified location with the specified string.
///
/// \param ShowColors Display colored messages if output is a terminal and
/// the default error handler is used.
void PrintMessage(llvm::raw_ostream &os, llvm::SMLoc loc, DiagKind kind,
const llvm::Twine &msg,
llvm::ArrayRef<llvm::SMRange> ranges = {},
llvm::ArrayRef<llvm::SMFixIt> fixIts = {},
bool showColors = true) const;
/// Emits a diagnostic to llvm::errs().
void PrintMessage(llvm::SMLoc loc, DiagKind kind, const llvm::Twine &msg,
llvm::ArrayRef<llvm::SMRange> ranges = {},
llvm::ArrayRef<llvm::SMFixIt> fixIts = {},
bool showColors = true) const;
/// Emits a manually-constructed diagnostic to the given output stream.
///
/// \param ShowColors Display colored messages if output is a terminal and
/// the default error handler is used.
void PrintMessage(llvm::raw_ostream &os, const SMDiagnostic &diagnostic,
bool showColors = true) const;
/// Return an SMDiagnostic at the specified location with the specified
/// string.
///
/// \param Msg If non-null, the kind of message (e.g., "error") which is
/// prefixed to the message.
SMDiagnostic GetMessage(llvm::SMLoc loc, DiagKind kind,
const llvm::Twine &msg,
llvm::ArrayRef<llvm::SMRange> ranges = {},
llvm::ArrayRef<llvm::SMFixIt> fixIts = {}) const;
/// Prints the names of included files and the line of the file they were
/// included from. A diagnostic handler can use this before printing its
/// custom formatted message.
///
/// \param IncludeLoc The location of the include.
/// \param OS the raw_ostream to print on.
void PrintIncludeStack(llvm::SMLoc includeLoc, llvm::raw_ostream &os) const;
};
/// Instances of this class encapsulate one diagnostic report, allowing
/// printing to a raw_ostream as a caret diagnostic.
class SMDiagnostic {
const SourceMgr *sm = nullptr;
llvm::SMLoc loc;
std::string filename;
int lineNo = 0;
int columnNo = 0;
SourceMgr::DiagKind kind = SourceMgr::DK_Error;
std::string message, lineContents;
std::vector<std::pair<unsigned, unsigned>> ranges;
llvm::SmallVector<llvm::SMFixIt, 4> fixIts;
public:
// Null diagnostic.
SMDiagnostic() = default;
// Diagnostic with no location (e.g. file not found, command line arg error).
SMDiagnostic(llvm::StringRef filename, SourceMgr::DiagKind knd,
llvm::StringRef msg)
: filename(filename), lineNo(-1), columnNo(-1), kind(knd), message(msg) {}
// Diagnostic with a location.
SMDiagnostic(const SourceMgr &sm, llvm::SMLoc l, llvm::StringRef fn, int line,
int col, SourceMgr::DiagKind kind, llvm::StringRef msg,
llvm::StringRef lineStr,
llvm::ArrayRef<std::pair<unsigned, unsigned>> ranges,
llvm::ArrayRef<llvm::SMFixIt> fixIts = {});
const SourceMgr *getSourceMgr() const { return sm; }
llvm::SMLoc getLoc() const { return loc; }
llvm::StringRef getFilename() const { return filename; }
int getLineNo() const { return lineNo; }
int getColumnNo() const { return columnNo; }
SourceMgr::DiagKind getKind() const { return kind; }
llvm::StringRef getMessage() const { return message; }
llvm::StringRef getLineContents() const { return lineContents; }
llvm::ArrayRef<std::pair<unsigned, unsigned>> getRanges() const {
return ranges;
}
void addFixIt(const llvm::SMFixIt &hint) { fixIts.push_back(hint); }
llvm::ArrayRef<llvm::SMFixIt> getFixIts() const { return fixIts; }
void print(const char *progName, llvm::raw_ostream &s, bool showColors = true,
bool showKindLabel = true) const;
};
}; // namespace serene
#endif

View File

@ -34,7 +34,7 @@ set(HEADER_LIST
"${INCLUDE_DIR}/serene/slir/utils.h"
"${INCLUDE_DIR}/serene/namespace.h"
"${INCLUDE_DIR}/serene/jit.h"
"${INCLUDE_DIR}/serene/source_mgr.h"
"${INCLUDE_DIR}/serene/passes.h")
# Make an automatic library - will be static or dynamic based on user setting
@ -52,6 +52,7 @@ add_library(serene
serene.cpp
namespace.cpp
jit.cpp
source_mgr.cpp
# Reader
reader/reader.cpp
@ -112,7 +113,7 @@ target_link_libraries(serene
MLIRTransforms
${llvm_libs})
target_precompile_headers(serene
PRIVATE
<serene_precompiles.h>
)
# target_precompile_headers(serene
# PRIVATE
# <serene_precompiles.h>
# )

View File

@ -92,7 +92,17 @@ int SereneContext::getOptimizatioLevel() {
return 3;
}
NSPtr SereneContext::readNamespace(std::string name) {
llvm::SMLoc loc;
return readNamespace(name, loc);
};
NSPtr SereneContext::readNamespace(std::string name, llvm::SMLoc loc) {
return sourceManager.readNamespace(*this, name, loc);
}
std::unique_ptr<SereneContext> makeSereneContext() {
return std::make_unique<SereneContext>();
};
}; // namespace serene

View File

@ -24,9 +24,8 @@
#include "serene/reader/location.h"
#include "mlir/IR/Identifier.h"
#include "llvm/Support/FormatVariadic.h"
#include <llvm/Support/FormatVariadic.h>
#include <mlir/IR/Identifier.h>
namespace serene {
namespace reader {
@ -38,33 +37,41 @@ LocationRange::LocationRange(const LocationRange &loc) {
/// Return the string represenation of the location.
std::string Location::toString() const {
return llvm::formatv("{0}:{1}:{2}", line, col, pos);
return llvm::formatv("{0}:{1}", line, col);
};
Location Location::clone() { return Location{c, bufferId, line, col}; }
/// Increase the given location by one and set the line/col value in respect to
/// the `newline` in place.
/// \param loc The `Location` data
/// \param newline Whether or not we reached a new line
void incLocation(Location &loc, bool newline) {
loc.pos++;
/// \param c A pointer to the current char that the location has to point to
void incLocation(Location &loc, const char *c) {
// TODO: Handle the end of line with respect to the OS.
// increase the current position in the buffer with respect to the end
// of line.
auto newline = *c == '\n';
if (!newline) {
loc.col++;
} else {
loc.col = 0;
loc.line++;
}
loc.line++;
}
/// decrease the given location by one and set the line/col value in respect to
/// the `newline` in place.
/// \param loc The `Location` data
/// \param newline Whether or not we reached a new line
void decLocation(Location &loc, bool newline) {
loc.pos = loc.pos == 0 ? 0 : loc.pos - 1;
/// \param c A pointer to the current char that the location has to point to
void decLocation(Location &loc, const char *c) {
// TODO: Handle the end of line with respect to the OS.
// increase the current position in the buffer with respect to the end
// of line.
auto newline = *c == '\n';
if (newline) {
loc.line = loc.line == 0 ? 0 : loc.line - 1;
// We don't move back the `col` value because we simply don't know it
} else {
loc.col = loc.col == 0 ? 0 : loc.col - 1;

View File

@ -29,13 +29,16 @@
#include "serene/exprs/symbol.h"
#include "serene/namespace.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/ErrorHandling.h"
#include <assert.h>
#include <cctype>
#include <fstream>
#include <llvm/Support/Error.h>
#include <llvm/Support/ErrorOr.h>
#include <llvm/Support/FormatVariadic.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Support/SMLoc.h>
#include <memory>
#include <string>
@ -43,44 +46,51 @@ namespace serene {
namespace reader {
Reader::Reader(const llvm::StringRef input) { this->setInput(input); };
Reader::Reader(SereneContext &ctx, llvm::StringRef buffer)
: ctx(ctx), buf(buffer){};
/// Set the input of the reader.
///\param input Set the input to the given string
void Reader::setInput(const llvm::StringRef input) {
current_location = Location::unit();
ast.clear();
input_stream.clear();
input_stream.write(input.str().c_str(), input.size());
};
Reader::Reader(SereneContext &ctx, llvm::MemoryBufferRef buffer)
: ctx(ctx), buf(buffer.getBuffer()){};
Reader::~Reader() { READER_LOG("Destroying the reader"); }
char Reader::getChar(bool skip_whitespace) {
const char *Reader::getChar(bool skip_whitespace) {
for (;;) {
char c = input_stream.get();
if (current_char == NULL) {
READER_LOG("Setting the first char of the buffer");
current_char = buf.begin();
current_pos = 1;
} else {
current_char++;
current_pos++;
}
this->current_char = c;
READER_LOG("Current Char: " << *current_char);
incLocation(current_location, current_char);
// TODO: Handle the end of line with respect to the OS.
// increase the current position in the buffer with respect to the end
// of line.
incLocation(current_location, c == '\n');
if (skip_whitespace == true && isspace(c)) {
if (skip_whitespace == true && isspace(*current_char)) {
READER_LOG("Skip whitespace is true and the char is a whitespace");
continue;
} else {
return c;
return current_char;
}
}
};
void Reader::ungetChar() {
input_stream.unget();
READER_LOG("Unread Char: " << *current_char);
current_char--;
current_pos--;
// The char that we just unget
decLocation(current_location, this->current_char == '\n');
decLocation(current_location, current_char);
};
bool Reader::isEndOfBuffer(const char *c) {
return *c == '\0' || current_pos > buf.size() || *c == EOF;
};
Location Reader::getCurrentLocation() { return current_location.clone(); };
/// A predicate function indicating whether the given char `c` is a valid
/// char for the starting point of a symbol or not.
bool Reader::isValidForIdentifier(char c) {
@ -106,8 +116,7 @@ bool Reader::isValidForIdentifier(char c) {
return true;
}
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9')) {
if (std::isalnum(c)) {
return true;
}
return false;
@ -122,31 +131,33 @@ exprs::Node Reader::readNumber(bool neg) {
bool empty = false;
LocationRange loc;
char c = getChar(false);
auto c = getChar(false);
loc.start = current_location;
loc.start = getCurrentLocation();
while (c != EOF &&
((!(isspace(c)) && ((c >= '0' && c <= '9') | (c == '.'))))) {
while (!isEndOfBuffer(c) &&
((!(isspace(*c)) && ((isdigit(*c)) | (*c == '.'))))) {
if (c == '.' && floatNum == true) {
llvm::errs() << "Two float points in a number?\n";
// TODO: Return a proper error
return nullptr;
if (*c == '.' && floatNum == true) {
ctx.sourceManager.PrintMessage(
llvm::errs(), llvm::SMLoc::getFromPointer(c),
ctx.sourceManager.DK_Error,
llvm::formatv("Two float points in a number?\n", c));
exit(1);
}
if (c == '.') {
if (*c == '.') {
floatNum = true;
}
number += c;
number += *c;
c = getChar(false);
empty = false;
}
if (!empty) {
ungetChar();
loc.end = current_location;
loc.end = getCurrentLocation();
return exprs::make<exprs::Number>(loc, number, neg, floatNum);
}
@ -156,73 +167,85 @@ exprs::Node Reader::readNumber(bool neg) {
/// Reads a symbol. If the symbol looks like a number
/// If reads it as number
exprs::Node Reader::readSymbol() {
READER_LOG("Reading a symbol...");
bool empty = true;
char c = getChar(false);
auto c = getChar(false);
READER_LOG("Reading a symbol...");
if (!this->isValidForIdentifier(c)) {
if (!this->isValidForIdentifier(*c)) {
// TODO: Replece this with a tranceback function or something to raise
// synatx error.
llvm::errs() << llvm::formatv(
"Invalid character at the start of a symbol: '{0}'\n", c);
ctx.sourceManager.PrintMessage(
llvm::errs(), llvm::SMLoc::getFromPointer(c),
ctx.sourceManager.DK_Error,
llvm::formatv("Invalid character at the start of a symbol: '{0}'\n",
c));
exit(1);
}
if (c == '-') {
char next = getChar(false);
if (next >= '0' && next <= '9') {
ungetChar();
if (*c == '-') {
auto next = getChar(false);
ungetChar();
if (isdigit(*next)) {
return readNumber(true);
}
}
if (c >= '0' && c <= '9') {
if (isdigit(*c)) {
ungetChar();
return readNumber(false);
}
std::string sym("");
LocationRange loc;
loc.start = current_location;
loc.start = getCurrentLocation();
while (c != EOF && ((!(isspace(c)) && this->isValidForIdentifier(c)))) {
sym += c;
while (!isEndOfBuffer(c) &&
((!(isspace(*c)) && this->isValidForIdentifier(*c)))) {
sym += *c;
c = getChar(false);
empty = false;
}
if (!empty) {
ungetChar();
loc.end = current_location;
loc.end = getCurrentLocation();
return exprs::make<exprs::Symbol>(loc, sym);
}
// TODO: it should never happens
llvm_unreachable("Unpredicted symbol read scenario");
return nullptr;
};
/// Reads a list recursively
exprs::Node Reader::readList() {
READER_LOG("Reading a list...");
auto list = exprs::makeAndCast<exprs::List>(current_location);
auto list = exprs::makeAndCast<exprs::List>(getCurrentLocation());
char c = getChar(true);
auto c = getChar(true);
// TODO: Replace the assert with an actual check.
assert(c == '(');
assert(*c == '(');
bool list_terminated = false;
do {
char c = getChar(true);
auto c = getChar(true);
switch (c) {
case EOF:
throw ReadError(const_cast<char *>("EOF reached before closing of list"));
if (isEndOfBuffer(c)) {
ctx.sourceManager.PrintMessage(
llvm::errs(), llvm::SMLoc::getFromPointer(c),
ctx.sourceManager.DK_Error,
llvm::formatv("EOF reached before closing of list"));
exit(1);
}
switch (*c) {
case ')':
list_terminated = true;
list->location.end = current_location;
list->location.end = getCurrentLocation();
break;
default:
@ -237,18 +260,20 @@ exprs::Node Reader::readList() {
/// Reads an expression by dispatching to the proper reader function.
exprs::Node Reader::readExpr() {
char c = getChar(false);
READER_LOG("Read char at `readExpr`: " << c);
auto c = getChar(true);
READER_LOG("Read char at `readExpr`: " << *c << " << " << current_pos << "|"
<< buf.size() << " BB "
<< isEndOfBuffer(c));
ungetChar();
switch (c) {
case '(': {
if (isEndOfBuffer(c)) {
return nullptr;
}
switch (*c) {
case '(': {
return readList();
}
case EOF:
return nullptr;
default:
return readSymbol();
}
@ -258,50 +283,32 @@ exprs::Node Reader::readExpr() {
/// Each expression type (from the reader perspective) has a
/// reader function.
Result<exprs::Ast> Reader::read() {
char c = getChar(true);
// auto c = getChar(true);
while (c != EOF) {
ungetChar();
// while (!isEndOfBuffer(c)) {
for (size_t current_pos = 0; current_pos < buf.size();) {
// ungetChar();
auto tmp{readExpr()};
if (tmp) {
this->ast.push_back(move(tmp));
} else {
break;
}
c = getChar(true);
// c = getChar(true);
}
return Result<exprs::Ast>::success(std::move(this->ast));
};
/// Reads all the expressions from the file provided via its path
// in the reader as an AST.
/// Each expression type (from the reader perspective) has a
/// reader function.
Result<exprs::Ast> FileReader::read() {
// TODO: Add support for relative path as well
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> fileOrErr =
llvm::MemoryBuffer::getFileOrSTDIN(file);
if (std::error_code EC = fileOrErr.getError()) {
llvm::errs() << "Could not open input file: " << EC.message() << "\n";
llvm::errs() << llvm::formatv("File: '{0}'\n", file);
llvm::errs() << "Use absolute path for now\n";
return Result<exprs::Ast>::error(llvm::make_error<MissingFileError>(file));
}
reader->setInput(fileOrErr.get()->getBuffer().str());
return reader->read();
Result<exprs::Ast> read(SereneContext &ctx, const llvm::StringRef input) {
reader::Reader r(ctx, input);
auto ast = r.read();
return ast;
}
FileReader::~FileReader() {
delete this->reader;
READER_LOG("Destroying the file reader");
}
Result<exprs::Ast> read(llvm::StringRef input) {
reader::Reader r(input);
Result<exprs::Ast> read(SereneContext &ctx, const llvm::MemoryBufferRef input) {
reader::Reader r(ctx, input);
auto ast = r.read();
return ast;
}

680
src/serene/source_mgr.cpp Normal file
View File

@ -0,0 +1,680 @@
/*
* Serene programming language.
*
* Copyright (c) 2020 Sameer Rahmani <lxsameer@gnu.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "serene/source_mgr.h"
#include "mlir/Support/LogicalResult.h"
#include "serene/namespace.h"
#include "serene/reader/reader.h"
#include "llvm/Support/MemoryBufferRef.h"
#include <llvm/Support/FormatVariadic.h>
#include <llvm/Support/Locale.h>
#include <llvm/Support/Path.h>
#include <system_error>
namespace serene {
static const size_t tabStop = 8;
std::string inline SourceMgr::convertNamespaceToPath(std::string ns_name) {
std::replace(ns_name.begin(), ns_name.end(), '.', '/');
llvm::SmallString<256> path;
path.append(ns_name);
llvm::sys::path::native(path);
return std::string(path);
};
NSPtr SourceMgr::readNamespace(SereneContext &ctx, std::string name,
llvm::SMLoc importLoc, bool entryNS) {
std::string includedFile;
auto path = convertNamespaceToPath(name);
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> newBufOrErr(
std::make_error_code(std::errc::no_such_file_or_directory));
SMGR_LOG("Attempt to load namespace: " + name);
// If the file didn't exist directly, see if it's in an include path.
for (unsigned i = 0, e = loadPaths.size(); i != e && !newBufOrErr; ++i) {
// TODO: Ugh, Udgly, fix this using llvm::sys::path functions
includedFile = loadPaths[i] + llvm::sys::path::get_separator().data() +
path + "." + DEFAULT_SUFFIX;
SMGR_LOG("Try to load the ns from: " + includedFile);
newBufOrErr = llvm::MemoryBuffer::getFile(includedFile);
}
if (!newBufOrErr) {
PrintMessage(importLoc, DiagKind::DK_Error,
llvm::formatv("Couldn't find namespace '{0}'", name));
return nullptr;
}
auto bufferId = AddNewSourceBuffer(std::move(*newBufOrErr), importLoc);
if (bufferId == 0) {
PrintMessage(importLoc, DiagKind::DK_Error,
llvm::formatv("Couldn't add namespace '{0}'", name));
return nullptr;
}
// Since we moved the buffer to be added as the source storage we
// need to get a pointer to it again
auto *buf = getMemoryBuffer(bufferId);
// Read the content of the buffer by passing it the reader
auto maybeAst = reader::read(ctx, buf->getBuffer());
if (!maybeAst) {
SMGR_LOG("Couldn't Read namespace: " + name)
return nullptr;
}
// Create the NS and set the AST
auto ns =
makeNamespace(ctx, name, llvm::Optional(llvm::StringRef(includedFile)));
if (mlir::failed(ns->setTree(maybeAst.getValue()))) {
SMGR_LOG("Couldn't set the AST for namespace: " + name)
return nullptr;
}
return ns;
};
unsigned SourceMgr::AddIncludeFile(const std::string &filename,
llvm::SMLoc includeLoc,
std::string &includedFile) {
includedFile = filename;
llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> NewBufOrErr =
llvm::MemoryBuffer::getFile(includedFile);
// If the file didn't exist directly, see if it's in an include path.
for (unsigned i = 0, e = loadPaths.size(); i != e && !NewBufOrErr; ++i) {
includedFile =
loadPaths[i] + llvm::sys::path::get_separator().data() + filename;
NewBufOrErr = llvm::MemoryBuffer::getFile(includedFile);
}
if (!NewBufOrErr)
return 0;
return AddNewSourceBuffer(std::move(*NewBufOrErr), includeLoc);
}
unsigned SourceMgr::FindBufferContainingLoc(llvm::SMLoc loc) const {
for (unsigned i = 0, e = buffers.size(); i != e; ++i)
if (loc.getPointer() >= buffers[i].buffer->getBufferStart() &&
// Use <= here so that a pointer to the null at the end of the buffer
// is included as part of the buffer.
loc.getPointer() <= buffers[i].buffer->getBufferEnd())
return i + 1;
return 0;
}
template <typename T>
static std::vector<T> &GetOrCreateOffsetCache(void *&offsetCache,
llvm::MemoryBuffer *buffer) {
if (offsetCache)
return *static_cast<std::vector<T> *>(offsetCache);
// Lazily fill in the offset cache.
auto *offsets = new std::vector<T>();
size_t sz = buffer->getBufferSize();
assert(sz <= std::numeric_limits<T>::max());
llvm::StringRef s = buffer->getBuffer();
for (size_t n = 0; n < sz; ++n) {
if (s[n] == '\n')
offsets->push_back(static_cast<T>(n));
}
offsetCache = offsets;
return *offsets;
}
template <typename T>
unsigned SourceMgr::SrcBuffer::getLineNumberSpecialized(const char *ptr) const {
std::vector<T> &offsets =
GetOrCreateOffsetCache<T>(offsetCache, buffer.get());
const char *bufStart = buffer->getBufferStart();
assert(ptr >= bufStart && ptr <= buffer->getBufferEnd());
ptrdiff_t ptrDiff = ptr - bufStart;
assert(ptrDiff >= 0 &&
static_cast<size_t>(ptrDiff) <= std::numeric_limits<T>::max());
T ptrOffset = static_cast<T>(ptrDiff);
// llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get
// the line number.
return llvm::lower_bound(offsets, ptrOffset) - offsets.begin() + 1;
}
/// Look up a given \p Ptr in in the buffer, determining which line it came
/// from.
unsigned SourceMgr::SrcBuffer::getLineNumber(const char *ptr) const {
size_t sz = buffer->getBufferSize();
if (sz <= std::numeric_limits<uint8_t>::max())
return getLineNumberSpecialized<uint8_t>(ptr);
else if (sz <= std::numeric_limits<uint16_t>::max())
return getLineNumberSpecialized<uint16_t>(ptr);
else if (sz <= std::numeric_limits<uint32_t>::max())
return getLineNumberSpecialized<uint32_t>(ptr);
else
return getLineNumberSpecialized<uint64_t>(ptr);
}
template <typename T>
const char *SourceMgr::SrcBuffer::getPointerForLineNumberSpecialized(
unsigned lineNo) const {
std::vector<T> &offsets =
GetOrCreateOffsetCache<T>(offsetCache, buffer.get());
// We start counting line and column numbers from 1.
if (lineNo != 0)
--lineNo;
const char *bufStart = buffer->getBufferStart();
// The offset cache contains the location of the \n for the specified line,
// we want the start of the line. As such, we look for the previous entry.
if (lineNo == 0)
return bufStart;
if (lineNo > offsets.size())
return nullptr;
return bufStart + offsets[lineNo - 1] + 1;
}
/// Return a pointer to the first character of the specified line number or
/// null if the line number is invalid.
const char *
SourceMgr::SrcBuffer::getPointerForLineNumber(unsigned lineNo) const {
size_t sz = buffer->getBufferSize();
if (sz <= std::numeric_limits<uint8_t>::max())
return getPointerForLineNumberSpecialized<uint8_t>(lineNo);
else if (sz <= std::numeric_limits<uint16_t>::max())
return getPointerForLineNumberSpecialized<uint16_t>(lineNo);
else if (sz <= std::numeric_limits<uint32_t>::max())
return getPointerForLineNumberSpecialized<uint32_t>(lineNo);
else
return getPointerForLineNumberSpecialized<uint64_t>(lineNo);
}
SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&other)
: buffer(std::move(other.buffer)), offsetCache(other.offsetCache),
includeLoc(other.includeLoc) {
other.offsetCache = nullptr;
}
SourceMgr::SrcBuffer::~SrcBuffer() {
if (offsetCache) {
size_t sz = buffer->getBufferSize();
if (sz <= std::numeric_limits<uint8_t>::max())
delete static_cast<std::vector<uint8_t> *>(offsetCache);
else if (sz <= std::numeric_limits<uint16_t>::max())
delete static_cast<std::vector<uint16_t> *>(offsetCache);
else if (sz <= std::numeric_limits<uint32_t>::max())
delete static_cast<std::vector<uint32_t> *>(offsetCache);
else
delete static_cast<std::vector<uint64_t> *>(offsetCache);
offsetCache = nullptr;
}
}
std::pair<unsigned, unsigned>
SourceMgr::getLineAndColumn(llvm::SMLoc loc, unsigned bufferID) const {
if (!bufferID)
bufferID = FindBufferContainingLoc(loc);
assert(bufferID && "Invalid location!");
auto &sb = getBufferInfo(bufferID);
const char *ptr = loc.getPointer();
unsigned lineNo = sb.getLineNumber(ptr);
const char *bufStart = sb.buffer->getBufferStart();
size_t newlineOffs =
llvm::StringRef(bufStart, ptr - bufStart).find_last_of("\n\r");
if (newlineOffs == llvm::StringRef::npos)
newlineOffs = ~(size_t)0;
return std::make_pair(lineNo, ptr - bufStart - newlineOffs);
}
// FIXME: Note that the formatting of source locations is spread between
// multiple functions, some in SourceMgr and some in SMDiagnostic. A better
// solution would be a general-purpose source location formatter
// in one of those two classes, or possibly in llvm::SMLoc.
/// Get a string with the source location formatted in the standard
/// style, but without the line offset. If \p IncludePath is true, the path
/// is included. If false, only the file name and extension are included.
std::string SourceMgr::getFormattedLocationNoOffset(llvm::SMLoc loc,
bool includePath) const {
auto bufferID = FindBufferContainingLoc(loc);
assert(bufferID && "Invalid location!");
auto fileSpec = getBufferInfo(bufferID).buffer->getBufferIdentifier();
if (includePath) {
return fileSpec.str() + ":" + std::to_string(FindLineNumber(loc, bufferID));
} else {
auto I = fileSpec.find_last_of("/\\");
I = (I == fileSpec.size()) ? 0 : (I + 1);
return fileSpec.substr(I).str() + ":" +
std::to_string(FindLineNumber(loc, bufferID));
}
}
/// Given a line and column number in a mapped buffer, turn it into an
/// llvm::SMLoc. This will return a null llvm::SMLoc if the line/column location
/// is invalid.
llvm::SMLoc SourceMgr::FindLocForLineAndColumn(unsigned bufferID,
unsigned lineNo,
unsigned colNo) {
auto &sb = getBufferInfo(bufferID);
const char *ptr = sb.getPointerForLineNumber(lineNo);
if (!ptr)
return llvm::SMLoc();
// We start counting line and column numbers from 1.
if (colNo != 0)
--colNo;
// If we have a column number, validate it.
if (colNo) {
// Make sure the location is within the current line.
if (ptr + colNo > sb.buffer->getBufferEnd())
return llvm::SMLoc();
// Make sure there is no newline in the way.
if (llvm::StringRef(ptr, colNo).find_first_of("\n\r") !=
llvm::StringRef::npos)
return llvm::SMLoc();
ptr += colNo;
}
return llvm::SMLoc::getFromPointer(ptr);
}
void SourceMgr::PrintIncludeStack(llvm::SMLoc includeLoc,
llvm::raw_ostream &os) const {
if (includeLoc == llvm::SMLoc())
return; // Top of stack.
unsigned curBuf = FindBufferContainingLoc(includeLoc);
assert(curBuf && "Invalid or unspecified location!");
PrintIncludeStack(getBufferInfo(curBuf).includeLoc, os);
os << "Included from " << getBufferInfo(curBuf).buffer->getBufferIdentifier()
<< ":" << FindLineNumber(includeLoc, curBuf) << ":\n";
}
SMDiagnostic SourceMgr::GetMessage(llvm::SMLoc loc, SourceMgr::DiagKind kind,
const llvm::Twine &msg,
llvm::ArrayRef<llvm::SMRange> ranges,
llvm::ArrayRef<llvm::SMFixIt> fixIts) const {
// First thing to do: find the current buffer containing the specified
// location to pull out the source line.
llvm::SmallVector<std::pair<unsigned, unsigned>, 4> colRanges;
std::pair<unsigned, unsigned> lineAndCol;
llvm::StringRef bufferID = "<unknown>";
llvm::StringRef lineStr;
if (loc.isValid()) {
unsigned curBuf = FindBufferContainingLoc(loc);
assert(curBuf && "Invalid or unspecified location!");
const llvm::MemoryBuffer *curMB = getMemoryBuffer(curBuf);
bufferID = curMB->getBufferIdentifier();
// Scan backward to find the start of the line.
const char *lineStart = loc.getPointer();
const char *bufStart = curMB->getBufferStart();
while (lineStart != bufStart && lineStart[-1] != '\n' &&
lineStart[-1] != '\r')
--lineStart;
// Get the end of the line.
const char *lineEnd = loc.getPointer();
const char *bufEnd = curMB->getBufferEnd();
while (lineEnd != bufEnd && lineEnd[0] != '\n' && lineEnd[0] != '\r')
++lineEnd;
lineStr = llvm::StringRef(lineStart, lineEnd - lineStart);
// Convert any ranges to column ranges that only intersect the line of the
// location.
for (unsigned i = 0, e = ranges.size(); i != e; ++i) {
llvm::SMRange r = ranges[i];
if (!r.isValid())
continue;
// If the line doesn't contain any part of the range, then ignore it.
if (r.Start.getPointer() > lineEnd || r.End.getPointer() < lineStart)
continue;
// Ignore pieces of the range that go onto other lines.
if (r.Start.getPointer() < lineStart)
r.Start = llvm::SMLoc::getFromPointer(lineStart);
if (r.End.getPointer() > lineEnd)
r.End = llvm::SMLoc::getFromPointer(lineEnd);
// Translate from llvm::SMLoc ranges to column ranges.
// FIXME: Handle multibyte characters.
colRanges.push_back(std::make_pair(r.Start.getPointer() - lineStart,
r.End.getPointer() - lineStart));
}
lineAndCol = getLineAndColumn(loc, curBuf);
}
return SMDiagnostic(*this, loc, bufferID, lineAndCol.first,
lineAndCol.second - 1, kind, msg.str(), lineStr,
colRanges, fixIts);
}
void SourceMgr::PrintMessage(llvm::raw_ostream &os,
const SMDiagnostic &diagnostic,
bool showColors) const {
// Report the message with the diagnostic handler if present.
if (diagHandler) {
diagHandler(diagnostic, diagContext);
return;
}
if (diagnostic.getLoc().isValid()) {
unsigned CurBuf = FindBufferContainingLoc(diagnostic.getLoc());
assert(CurBuf && "Invalid or unspecified location!");
PrintIncludeStack(getBufferInfo(CurBuf).includeLoc, os);
}
diagnostic.print(nullptr, os, showColors);
}
void SourceMgr::PrintMessage(llvm::raw_ostream &os, llvm::SMLoc loc,
SourceMgr::DiagKind kind, const llvm::Twine &msg,
llvm::ArrayRef<llvm::SMRange> ranges,
llvm::ArrayRef<llvm::SMFixIt> fixIts,
bool showColors) const {
PrintMessage(os, GetMessage(loc, kind, msg, ranges, fixIts), showColors);
}
void SourceMgr::PrintMessage(llvm::SMLoc loc, SourceMgr::DiagKind kind,
const llvm::Twine &msg,
llvm::ArrayRef<llvm::SMRange> ranges,
llvm::ArrayRef<llvm::SMFixIt> fixIts,
bool showColors) const {
PrintMessage(llvm::errs(), loc, kind, msg, ranges, fixIts, showColors);
}
//===----------------------------------------------------------------------===//
// SMDiagnostic Implementation
//===----------------------------------------------------------------------===//
SMDiagnostic::SMDiagnostic(const SourceMgr &sm, llvm::SMLoc l,
llvm::StringRef fn, int line, int col,
SourceMgr::DiagKind kind, llvm::StringRef msg,
llvm::StringRef lineStr,
llvm::ArrayRef<std::pair<unsigned, unsigned>> ranges,
llvm::ArrayRef<llvm::SMFixIt> hints)
: sm(&sm), loc(l), filename(std::string(fn)), lineNo(line), columnNo(col),
kind(kind), message(msg), lineContents(lineStr), ranges(ranges.vec()),
fixIts(hints.begin(), hints.end()) {
llvm::sort(fixIts);
}
static void buildFixItLine(std::string &caretLine, std::string &fixItLine,
llvm::ArrayRef<llvm::SMFixIt> fixIts,
llvm::ArrayRef<char> sourceLine) {
if (fixIts.empty())
return;
const char *lineStart = sourceLine.begin();
const char *lineEnd = sourceLine.end();
size_t prevHintEndCol = 0;
for (const llvm::SMFixIt &fixit : fixIts) {
// If the fixit contains a newline or tab, ignore it.
if (fixit.getText().find_first_of("\n\r\t") != llvm::StringRef::npos)
continue;
llvm::SMRange r = fixit.getRange();
// If the line doesn't contain any part of the range, then ignore it.
if (r.Start.getPointer() > lineEnd || r.End.getPointer() < lineStart)
continue;
// Translate from llvm::SMLoc to column.
// Ignore pieces of the range that go onto other lines.
// FIXME: Handle multibyte characters in the source line.
unsigned firstCol;
if (r.Start.getPointer() < lineStart)
firstCol = 0;
else
firstCol = r.Start.getPointer() - lineStart;
// If we inserted a long previous hint, push this one forwards, and add
// an extra space to show that this is not part of the previous
// completion. This is sort of the best we can do when two hints appear
// to overlap.
//
// Note that if this hint is located immediately after the previous
// hint, no space will be added, since the location is more important.
unsigned hintCol = firstCol;
if (hintCol < prevHintEndCol)
hintCol = prevHintEndCol + 1;
// FIXME: This assertion is intended to catch unintended use of multibyte
// characters in fixits. If we decide to do this, we'll have to track
// separate byte widths for the source and fixit lines.
assert((size_t)llvm::sys::locale::columnWidth(fixit.getText()) ==
fixit.getText().size());
// This relies on one byte per column in our fixit hints.
unsigned lastColumnModified = hintCol + fixit.getText().size();
if (lastColumnModified > fixItLine.size())
fixItLine.resize(lastColumnModified, ' ');
llvm::copy(fixit.getText(), fixItLine.begin() + hintCol);
prevHintEndCol = lastColumnModified;
// For replacements, mark the removal range with '~'.
// FIXME: Handle multibyte characters in the source line.
unsigned lastCol;
if (r.End.getPointer() >= lineEnd)
lastCol = lineEnd - lineStart;
else
lastCol = r.End.getPointer() - lineStart;
std::fill(&caretLine[firstCol], &caretLine[lastCol], '~');
}
}
static void printSourceLine(llvm::raw_ostream &s,
llvm::StringRef lineContents) {
// Print out the source line one character at a time, so we can expand tabs.
for (unsigned i = 0, e = lineContents.size(), outCol = 0; i != e; ++i) {
size_t nextTab = lineContents.find('\t', i);
// If there were no tabs left, print the rest, we are done.
if (nextTab == llvm::StringRef::npos) {
s << lineContents.drop_front(i);
break;
}
// Otherwise, print from i to NextTab.
s << lineContents.slice(i, nextTab);
outCol += nextTab - i;
i = nextTab;
// If we have a tab, emit at least one space, then round up to 8 columns.
do {
s << ' ';
++outCol;
} while ((outCol % tabStop) != 0);
}
s << '\n';
}
static bool isNonASCII(char c) { return c & 0x80; }
void SMDiagnostic::print(const char *progName, llvm::raw_ostream &os,
bool showColors, bool showKindLabel) const {
llvm::ColorMode mode =
showColors ? llvm::ColorMode::Auto : llvm::ColorMode::Disable;
{
llvm::WithColor s(os, llvm::raw_ostream::SAVEDCOLOR, true, false, mode);
if (progName && progName[0])
s << progName << ": ";
if (!filename.empty()) {
if (filename == "-")
s << "<stdin>";
else
s << filename;
if (lineNo != -1) {
s << ':' << lineNo;
if (columnNo != -1)
s << ':' << (columnNo + 1);
}
s << ": ";
}
}
if (showKindLabel) {
switch (kind) {
case SourceMgr::DK_Error:
llvm::WithColor::error(os, "", !showColors);
break;
case SourceMgr::DK_Warning:
llvm::WithColor::warning(os, "", !showColors);
break;
case SourceMgr::DK_Note:
llvm::WithColor::note(os, "", !showColors);
break;
case SourceMgr::DK_Remark:
llvm::WithColor::remark(os, "", !showColors);
break;
}
}
llvm::WithColor(os, llvm::raw_ostream::SAVEDCOLOR, true, false, mode)
<< message << '\n';
if (lineNo == -1 || columnNo == -1)
return;
// FIXME: If there are multibyte or multi-column characters in the source, all
// our ranges will be wrong. To do this properly, we'll need a byte-to-column
// map like Clang's TextDiagnostic. For now, we'll just handle tabs by
// expanding them later, and bail out rather than show incorrect ranges and
// misaligned fixits for any other odd characters.
if (llvm::any_of(lineContents, isNonASCII)) {
printSourceLine(os, lineContents);
return;
}
size_t numColumns = lineContents.size();
// Build the line with the caret and ranges.
std::string caretLine(numColumns + 1, ' ');
// Expand any ranges.
for (const std::pair<unsigned, unsigned> &r : ranges)
std::fill(&caretLine[r.first],
&caretLine[std::min((size_t)r.second, caretLine.size())], '~');
// Add any fix-its.
// FIXME: Find the beginning of the line properly for multibyte characters.
std::string fixItInsertionLine;
buildFixItLine(
caretLine, fixItInsertionLine, fixIts,
llvm::makeArrayRef(loc.getPointer() - columnNo, lineContents.size()));
// Finally, plop on the caret.
if (unsigned(columnNo) <= numColumns)
caretLine[columnNo] = '^';
else
caretLine[numColumns] = '^';
// ... and remove trailing whitespace so the output doesn't wrap for it. We
// know that the line isn't completely empty because it has the caret in it at
// least.
caretLine.erase(caretLine.find_last_not_of(' ') + 1);
printSourceLine(os, lineContents);
{
llvm::ColorMode mode =
showColors ? llvm::ColorMode::Auto : llvm::ColorMode::Disable;
llvm::WithColor s(os, llvm::raw_ostream::GREEN, true, false, mode);
// Print out the caret line, matching tabs in the source line.
for (unsigned i = 0, e = caretLine.size(), outCol = 0; i != e; ++i) {
if (i >= lineContents.size() || lineContents[i] != '\t') {
s << caretLine[i];
++outCol;
continue;
}
// Okay, we have a tab. Insert the appropriate number of characters.
do {
s << caretLine[i];
++outCol;
} while ((outCol % tabStop) != 0);
}
s << '\n';
}
// Print out the replacement line, matching tabs in the source line.
if (fixItInsertionLine.empty())
return;
for (size_t i = 0, e = fixItInsertionLine.size(), outCol = 0; i < e; ++i) {
if (i >= lineContents.size() || lineContents[i] != '\t') {
os << fixItInsertionLine[i];
++outCol;
continue;
}
// Okay, we have a tab. Insert the appropriate number of characters.
do {
os << fixItInsertionLine[i];
// FIXME: This is trying not to break up replacements, but then to re-sync
// with the tabs between replacements. This will fail, though, if two
// fix-it replacements are exactly adjacent, or if a fix-it contains a
// space. Really we should be precomputing column widths, which we'll
// need anyway for multibyte chars.
if (fixItInsertionLine[i] != ' ')
++i;
++outCol;
} while (((outCol % tabStop) != 0) && i != e);
}
os << '\n';
}
}; // namespace serene