/* -*- C++ -*- * Serene Programming Language * * Copyright (c) 2019-2022 Sameer Rahmani * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, version 2. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /** * Commentary: * `Reader` is the base parser class and accepts a buffer like objenct (usually * `llvm::StringRef`) as the input and parsess it to create an AST (look at the * `serene::exprs::Expression` class). * * The parsing algorithm is quite simple and it is a LL(2). It means that, we * start parsing the input from the very first character and parse the input * one char at a time till we reach the end of the input. Please note that * when we call the `advance` function to move forward in the buffer, we * can't go back. In order to look ahead in the buffer without moving in the * buffer we use the `nextChar` method. * * We have dedicated methods to read different forms like `list`, `symbol` * `number` and etc. Each of them return a `MaybeNode` that in the success * case contains the node and an `Error` on the failure case. */ #ifndef SERENE_READER_READER_H #define SERENE_READER_READER_H #include "serene/errors.h" #include "serene/exprs/expression.h" #include "serene/exprs/list.h" #include "serene/exprs/symbol.h" #include "serene/reader/location.h" #include "serene/serene.h" #include #include #include #include #include #include #include #include #include #include #include #define READER_LOG(...) \ DEBUG_WITH_TYPE("READER", llvm::dbgs() \ << "[READER]: " << __VA_ARGS__ << "\n"); namespace serene::reader { /// Base reader class which reads from a string directly. class Reader { private: SereneContext &ctx; llvm::StringRef ns; llvm::Optional filename; const char *currentChar = NULL; llvm::StringRef buf; /// The position tracker that we will use to determine the end of the /// buffer since the buffer might not be null terminated size_t currentPos = -1; Location currentLocation; bool readEOL = false; /// Returns a clone of the current location Location getCurrentLocation(); /// Returns the next character from the stream. /// @param skip_whitespace An indicator to whether skip white space like chars /// or not void advance(bool skipWhitespace = false); void advanceByOne(); const char *nextChar(bool skipWhitespace = false, unsigned count = 1); /// Returns a boolean indicating whether the given input character is valid /// for an identifier or not. static bool isValidForIdentifier(char c); // The property to store the ast tree exprs::Ast ast; exprs::MaybeNode readSymbol(); exprs::MaybeNode readNumber(bool); exprs::MaybeNode readList(); exprs::MaybeNode readExpr(); bool isEndOfBuffer(const char *); public: Reader(SereneContext &ctx, llvm::StringRef buf, llvm::StringRef ns, llvm::Optional filename); Reader(SereneContext &ctx, llvm::MemoryBufferRef buf, llvm::StringRef ns, llvm::Optional filename); // void setInput(const llvm::StringRef string); /// Parses the the input and creates a possible AST out of it or errors /// otherwise. exprs::MaybeAst read(); ~Reader(); }; /// Parses the given `input` string and returns a `Result` /// which may contains an AST or an `llvm::Error` SERENE_EXPORT exprs::MaybeAst read(SereneContext &ctx, llvm::StringRef input, llvm::StringRef ns, llvm::Optional filename); SERENE_EXPORT exprs::MaybeAst read(SereneContext &ctx, llvm::MemoryBufferRef input, llvm::StringRef ns, llvm::Optional filename); } // namespace serene::reader #endif