2020-07-16 15:48:06 +01:00
|
|
|
/**
|
|
|
|
* Serene programming language.
|
|
|
|
*
|
|
|
|
* Copyright (c) 2020 Sameer Rahmani <lxsameer@gnu.org>
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
|
|
* in the Software without restriction, including without limitation the rights
|
|
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
|
|
* furnished to do so, subject to the following conditions:
|
|
|
|
*
|
2020-07-23 21:47:13 +01:00
|
|
|
* The above copyright notice and this permission notice shall be included in
|
|
|
|
* all copies or substantial portions of the Software.
|
2020-07-16 15:48:06 +01:00
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
|
|
* SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "serene/reader.hpp"
|
2021-03-22 19:53:10 +00:00
|
|
|
#include "serene/error.hpp"
|
2020-07-16 15:48:06 +01:00
|
|
|
#include "serene/list.hpp"
|
2020-07-23 21:47:13 +01:00
|
|
|
#include "serene/symbol.hpp"
|
|
|
|
#include <assert.h>
|
2021-03-22 19:53:10 +00:00
|
|
|
#include <fstream>
|
2020-07-23 21:47:13 +01:00
|
|
|
#include <iostream>
|
|
|
|
#include <memory>
|
|
|
|
#include <string>
|
2020-07-16 15:48:06 +01:00
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
namespace serene {
|
2021-03-22 19:53:10 +00:00
|
|
|
Reader::Reader(const string input) { this->setInput(input); };
|
|
|
|
|
|
|
|
void Reader::setInput(const string input) {
|
2020-07-23 21:47:13 +01:00
|
|
|
input_stream.write(input.c_str(), input.size());
|
|
|
|
};
|
|
|
|
|
|
|
|
Reader::~Reader() { READER_LOG("Destroying the reader"); }
|
|
|
|
|
2020-08-02 21:08:35 +01:00
|
|
|
char Reader::get_char(bool skip_whitespace) {
|
2020-07-23 21:47:13 +01:00
|
|
|
for (;;) {
|
|
|
|
char c = input_stream.get();
|
|
|
|
if (skip_whitespace == true && isspace(c)) {
|
|
|
|
continue;
|
|
|
|
} else {
|
|
|
|
return c;
|
|
|
|
}
|
2020-07-21 22:23:11 +01:00
|
|
|
}
|
2020-07-23 21:47:13 +01:00
|
|
|
};
|
2020-07-21 22:23:11 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
void Reader::unget_char() { input_stream.unget(); };
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
bool Reader::is_valid_for_identifier(char c) {
|
|
|
|
switch (c) {
|
|
|
|
case '!' | '$' | '%' | '&' | '*' | '+' | '-' | '.' | '~' | '/' | ':' | '<' |
|
|
|
|
'=' | '>' | '?' | '@' | '^' | '_':
|
|
|
|
return true;
|
2020-07-16 15:48:06 +01:00
|
|
|
}
|
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
|
|
|
|
(c >= '0' && c <= '9')) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
ast_node Reader::read_symbol() {
|
|
|
|
bool empty = true;
|
|
|
|
char c = get_char(false);
|
2020-07-21 22:23:11 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
READER_LOG("Reading symbol");
|
|
|
|
if (!this->is_valid_for_identifier(c)) {
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
// TODO: Replece this with a tranceback function or something to raise
|
|
|
|
// synatx error.
|
|
|
|
fmt::print("Invalid character at the start of a symbol: '{}'\n", c);
|
|
|
|
exit(1);
|
|
|
|
}
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
string sym("");
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
while (c != EOF && ((!(isspace(c)) && this->is_valid_for_identifier(c)))) {
|
|
|
|
sym += c;
|
|
|
|
c = get_char(false);
|
|
|
|
empty = false;
|
|
|
|
}
|
2020-07-22 18:58:15 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
if (!empty) {
|
|
|
|
unget_char();
|
|
|
|
return make_unique<Symbol>(sym);
|
|
|
|
}
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
// TODO: it should never happens
|
|
|
|
return nullptr;
|
|
|
|
};
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
ast_list_node Reader::read_list(List *list) {
|
|
|
|
char c = get_char(true);
|
|
|
|
assert(c == '(');
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
bool list_terminated = false;
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
do {
|
|
|
|
char c = get_char(true);
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
switch (c) {
|
|
|
|
case EOF:
|
|
|
|
throw ReadError((char *)"EOF reached before closing of list");
|
|
|
|
case ')':
|
|
|
|
list_terminated = true;
|
|
|
|
break;
|
2020-07-21 22:23:11 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
default:
|
|
|
|
unget_char();
|
|
|
|
list->append(read_expr());
|
|
|
|
}
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
} while (!list_terminated);
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
return unique_ptr<List>(list);
|
|
|
|
}
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
ast_node Reader::read_expr() {
|
|
|
|
char c = get_char(false);
|
|
|
|
READER_LOG("CHAR: {}", c);
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
unget_char();
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
switch (c) {
|
|
|
|
case '(':
|
|
|
|
return read_list(new List());
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
case EOF:
|
|
|
|
return nullptr;
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
default:
|
|
|
|
return read_symbol();
|
2020-07-16 15:48:06 +01:00
|
|
|
}
|
2020-07-23 21:47:13 +01:00
|
|
|
}
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
ast_tree &Reader::read() {
|
|
|
|
char c = get_char(true);
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
while (c != EOF) {
|
|
|
|
unget_char();
|
|
|
|
auto tmp{read_expr()};
|
|
|
|
if (tmp) {
|
|
|
|
this->ast.push_back(move(tmp));
|
2020-07-16 15:48:06 +01:00
|
|
|
}
|
2020-07-23 21:47:13 +01:00
|
|
|
c = get_char(true);
|
|
|
|
}
|
2020-07-16 15:48:06 +01:00
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
return this->ast;
|
2021-03-22 19:53:10 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void Reader::dumpAST() {
|
|
|
|
ast_tree &ast = this->read();
|
|
|
|
std::string result = "";
|
|
|
|
for (auto &node : ast) {
|
|
|
|
result = fmt::format("{0} {1}", result, node->dumpAST());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ast_tree &FileReader::read() {
|
|
|
|
std::string buffer;
|
|
|
|
|
|
|
|
std::ifstream f(file.c_str());
|
|
|
|
|
|
|
|
if (f) {
|
|
|
|
f.seekg(0, std::ios::end);
|
|
|
|
buffer.resize(f.tellg());
|
|
|
|
f.seekg(0);
|
|
|
|
f.read(buffer.data(), buffer.size());
|
|
|
|
f.close();
|
|
|
|
|
|
|
|
reader->setInput(buffer);
|
|
|
|
|
|
|
|
return reader->read();
|
|
|
|
}
|
|
|
|
|
|
|
|
throw ReadError((char *)fmt::format("Can't find file '{}'", file).c_str());
|
|
|
|
}
|
|
|
|
|
|
|
|
void FileReader::dumpAST() {
|
|
|
|
ast_tree &ast = this->read();
|
|
|
|
std::string result = "";
|
|
|
|
for (auto &node : ast) {
|
|
|
|
result = fmt::format("{0} {1}", result, node->dumpAST());
|
|
|
|
}
|
|
|
|
cout << result << endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
FileReader::~FileReader() {
|
|
|
|
delete this->reader;
|
|
|
|
READER_LOG("Destroying the file reader");
|
|
|
|
}
|
|
|
|
|
2020-07-23 21:47:13 +01:00
|
|
|
} // namespace serene
|