2020-06-20 22:14:48 +01:00
|
|
|
use crate::ast::Expr;
|
|
|
|
use crate::types::Number;
|
2020-06-14 21:25:18 +01:00
|
|
|
use std::io::{BufReader, Read};
|
2020-06-14 21:02:31 +01:00
|
|
|
|
2020-07-06 14:18:29 +01:00
|
|
|
pub type ReadResult<'a> = Result<Expr, String>;
|
2020-06-14 21:02:31 +01:00
|
|
|
|
|
|
|
pub struct ExprReader {
|
2020-06-19 19:37:00 +01:00
|
|
|
location: i32,
|
2020-06-14 21:25:18 +01:00
|
|
|
read_stack: Vec<char>,
|
2020-06-14 21:02:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
impl ExprReader {
|
|
|
|
fn new() -> ExprReader {
|
2020-06-19 19:37:00 +01:00
|
|
|
ExprReader {
|
|
|
|
location: 0,
|
|
|
|
read_stack: vec![],
|
|
|
|
}
|
2020-06-14 21:02:31 +01:00
|
|
|
}
|
|
|
|
|
2020-06-19 19:37:00 +01:00
|
|
|
/// Retun a single character by reading from the `reader`. ,
|
|
|
|
///
|
|
|
|
/// # Arguments:
|
|
|
|
///
|
|
|
|
/// * `reader`: The buffer to read from.
|
|
|
|
/// * `skip_whitespace`: Whether or not to skip whitespace chars. *Bear in mind that
|
|
|
|
/// if you care about the newline char you should not skip the whitespaces*.
|
2020-06-14 21:25:18 +01:00
|
|
|
fn get_char<T: Read>(
|
|
|
|
&mut self,
|
|
|
|
reader: &mut BufReader<T>,
|
|
|
|
skip_whitespace: bool,
|
|
|
|
) -> Option<char> {
|
2020-06-14 21:02:31 +01:00
|
|
|
loop {
|
|
|
|
match self.read_stack.pop() {
|
2020-06-14 21:25:18 +01:00
|
|
|
Some(c) if !c.is_whitespace() || !skip_whitespace => return Some(c),
|
2020-06-14 21:02:31 +01:00
|
|
|
Some(_) => continue,
|
2020-06-14 21:25:18 +01:00
|
|
|
None => (),
|
2020-06-14 21:02:31 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
// Rust is weird, it doesn't provide a way to read from a buffer char by char.
|
|
|
|
let mut single_char_buff = [0];
|
|
|
|
let bytes_read = reader.read(&mut single_char_buff);
|
|
|
|
match bytes_read {
|
2020-06-19 19:37:00 +01:00
|
|
|
Ok(n) if n > 0 => self.location = self.location + 1,
|
2020-06-14 21:02:31 +01:00
|
|
|
Ok(_) => return None,
|
2020-06-14 21:25:18 +01:00
|
|
|
Err(_) => return None,
|
2020-06-14 21:02:31 +01:00
|
|
|
};
|
|
|
|
let ch = single_char_buff[0] as char;
|
|
|
|
|
|
|
|
match ch {
|
|
|
|
c if !c.is_whitespace() || !skip_whitespace => return Some(c),
|
2020-06-14 21:25:18 +01:00
|
|
|
_ => (),
|
2020-06-14 21:02:31 +01:00
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn unget_char(&mut self, c: char) {
|
|
|
|
self.read_stack.push(c);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Look ahead. AFAIK Rust doesn't provide any unread functoinality like Java input streams which
|
|
|
|
// sucks.
|
2020-06-14 21:25:18 +01:00
|
|
|
fn peek_char<T: Read>(
|
|
|
|
&mut self,
|
|
|
|
reader: &mut BufReader<T>,
|
|
|
|
skip_whitespace: bool,
|
|
|
|
) -> Option<char> {
|
2020-06-14 21:02:31 +01:00
|
|
|
match self.get_char(reader, skip_whitespace) {
|
|
|
|
Some(c) => {
|
|
|
|
self.unget_char(c);
|
|
|
|
Some(c)
|
2020-06-14 21:25:18 +01:00
|
|
|
}
|
|
|
|
None => None,
|
2020-06-14 21:02:31 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn read_quoted_expr<T: Read>(&mut self, reader: &mut BufReader<T>) -> ReadResult {
|
|
|
|
let rest = self.read_expr(reader)?;
|
2020-06-20 16:25:05 +01:00
|
|
|
Ok(Expr::make_list(
|
|
|
|
Expr::make_symbol("quote".to_string()),
|
|
|
|
rest,
|
|
|
|
))
|
2020-06-14 21:02:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
fn read_unquoted_expr<T: Read>(&mut self, reader: &mut BufReader<T>) -> ReadResult {
|
|
|
|
match self.peek_char(reader, true) {
|
|
|
|
Some('@') => {
|
|
|
|
// Move forward in the buffer since we peeked it
|
|
|
|
let _ = self.get_char(reader, true);
|
|
|
|
let rest = self.read_expr(reader)?;
|
2020-06-20 16:25:05 +01:00
|
|
|
Ok(Expr::make_list(
|
|
|
|
Expr::make_symbol("unquote-splicing".to_string()),
|
|
|
|
rest,
|
|
|
|
))
|
2020-06-14 21:25:18 +01:00
|
|
|
}
|
2020-06-14 21:02:31 +01:00
|
|
|
_ => {
|
|
|
|
let rest = self.read_expr(reader)?;
|
2020-06-20 16:25:05 +01:00
|
|
|
Ok(Expr::make_list(
|
|
|
|
Expr::make_symbol("unquote".to_string()),
|
|
|
|
rest,
|
|
|
|
))
|
2020-06-14 21:02:31 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn read_quasiquoted_expr<T: Read>(&mut self, reader: &mut BufReader<T>) -> ReadResult {
|
|
|
|
let rest = self.read_expr(reader)?;
|
2020-06-20 16:25:05 +01:00
|
|
|
Ok(Expr::make_list(
|
|
|
|
Expr::make_symbol("quasiquote".to_string()),
|
|
|
|
rest,
|
|
|
|
))
|
2020-06-14 21:02:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: We might want to replace Cons with an actual List struct
|
|
|
|
fn read_list<T: Read>(&mut self, reader: &mut BufReader<T>) -> ReadResult {
|
|
|
|
let first = match self.read_expr(reader) {
|
|
|
|
Ok(value) => value,
|
|
|
|
Err(e) => match self.get_char(reader, true) {
|
|
|
|
// is it an empty list ?
|
|
|
|
// TODO: we might want to return an actual empty list here
|
|
|
|
Some(')') => return Ok(Expr::Nil),
|
2020-06-14 21:25:18 +01:00
|
|
|
_ => return Err(e),
|
|
|
|
},
|
2020-06-14 21:02:31 +01:00
|
|
|
};
|
2020-06-20 15:20:05 +01:00
|
|
|
|
2020-06-14 21:02:31 +01:00
|
|
|
let rest = match self.get_char(reader, true) {
|
|
|
|
Some(e) => {
|
|
|
|
self.unget_char(e);
|
|
|
|
self.read_list(reader)?
|
2020-06-14 21:25:18 +01:00
|
|
|
}
|
2020-06-19 19:37:00 +01:00
|
|
|
None => return Err("Unexpected EOF while parsing a list.".to_string()),
|
2020-06-14 21:02:31 +01:00
|
|
|
};
|
|
|
|
|
2020-06-20 16:25:05 +01:00
|
|
|
Ok(Expr::make_list(first, rest))
|
2020-06-14 21:02:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
fn is_valid_for_identifier(&self, c: char) -> bool {
|
|
|
|
match c {
|
2020-06-14 21:25:18 +01:00
|
|
|
'!'
|
|
|
|
| '$'
|
|
|
|
| '%'
|
|
|
|
| '&'
|
|
|
|
| '*'
|
|
|
|
| '+'
|
|
|
|
| '-'
|
|
|
|
| '.'
|
|
|
|
| '~'
|
|
|
|
| '/'
|
|
|
|
| ':'
|
|
|
|
| '<'
|
|
|
|
| '='
|
|
|
|
| '>'
|
|
|
|
| '?'
|
|
|
|
| '@'
|
|
|
|
| '^'
|
|
|
|
| '_'
|
|
|
|
| 'a'..='z'
|
|
|
|
| 'A'..='Z'
|
|
|
|
| '0'..='9' => true,
|
|
|
|
_ => false,
|
2020-06-14 21:02:31 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn _read_symbol<T: Read>(&mut self, reader: &mut BufReader<T>) -> ReadResult {
|
|
|
|
let mut symbol = match self.peek_char(reader, false) {
|
|
|
|
Some(e) if self.is_valid_for_identifier(e) => {
|
|
|
|
// Read into string
|
|
|
|
let ch = self.get_char(reader, false).unwrap();
|
|
|
|
let mut s = String::new();
|
|
|
|
s.push(ch);
|
|
|
|
s
|
2020-06-14 21:25:18 +01:00
|
|
|
}
|
2020-06-14 21:02:31 +01:00
|
|
|
Some(e) => {
|
2020-06-14 21:25:18 +01:00
|
|
|
return Err(format!(
|
2020-06-19 19:37:00 +01:00
|
|
|
"Unexpected character: got '{}', expected a symbol at {}",
|
|
|
|
e, self.location
|
2020-06-14 21:25:18 +01:00
|
|
|
))
|
|
|
|
}
|
|
|
|
None => return Err("Unexpected EOF".to_string()),
|
2020-06-14 21:02:31 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
loop {
|
|
|
|
match self.get_char(reader, false) {
|
|
|
|
Some(v) if self.is_valid_for_identifier(v) => symbol.push(v),
|
|
|
|
Some(v) => {
|
|
|
|
self.unget_char(v);
|
|
|
|
break;
|
2020-06-14 21:25:18 +01:00
|
|
|
}
|
|
|
|
None => break,
|
2020-06-14 21:02:31 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-20 16:25:05 +01:00
|
|
|
Ok(Expr::make_symbol(symbol))
|
2020-06-14 21:02:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
fn read_escape_char<T: Read>(&mut self, reader: &mut BufReader<T>) -> Option<char> {
|
|
|
|
match self.get_char(reader, false) {
|
|
|
|
Some(e) => match e {
|
|
|
|
'\"' => Some('\"'),
|
|
|
|
'\'' => Some('\''),
|
|
|
|
'\\' => Some('\\'),
|
|
|
|
'n' => Some('\n'),
|
|
|
|
'r' => Some('\r'),
|
|
|
|
't' => Some('\t'),
|
2020-06-14 21:25:18 +01:00
|
|
|
_ => None,
|
2020-06-14 21:02:31 +01:00
|
|
|
},
|
2020-06-14 21:25:18 +01:00
|
|
|
None => None,
|
2020-06-14 21:02:31 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn read_string<T: Read>(&mut self, reader: &mut BufReader<T>) -> ReadResult {
|
|
|
|
// Skip the " char
|
|
|
|
let _ = self.get_char(reader, false);
|
|
|
|
|
|
|
|
let mut string = "".to_string();
|
|
|
|
loop {
|
|
|
|
match self.get_char(reader, false) {
|
|
|
|
Some(e) => match e {
|
|
|
|
'\"' => break,
|
|
|
|
'\\' => match self.read_escape_char(reader) {
|
|
|
|
Some(v) => string.push(v),
|
2020-06-14 21:25:18 +01:00
|
|
|
None => return Err(format!("Unexpected char to escape, got {}", e)),
|
2020-06-14 21:02:31 +01:00
|
|
|
},
|
|
|
|
//'\n' => return Err("Unescaped newlines are not allowed in string literals".to_string()),
|
2020-06-14 21:25:18 +01:00
|
|
|
_ => string.push(e),
|
2020-06-14 21:02:31 +01:00
|
|
|
},
|
2020-06-14 21:25:18 +01:00
|
|
|
None => return Err("Unexpected EOF while scanning a string".to_string()),
|
2020-06-14 21:02:31 +01:00
|
|
|
}
|
|
|
|
}
|
2020-06-20 16:25:05 +01:00
|
|
|
Ok(Expr::make_string(string))
|
2020-06-14 21:02:31 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
fn read_number<T: Read>(&mut self, reader: &mut BufReader<T>, neg: bool) -> ReadResult {
|
|
|
|
let mut is_double = false;
|
2020-06-14 21:25:18 +01:00
|
|
|
let mut string = (if neg { "-" } else { "" }).to_string();
|
2020-06-14 21:23:39 +01:00
|
|
|
|
2020-06-14 21:02:31 +01:00
|
|
|
loop {
|
|
|
|
match self.get_char(reader, false) {
|
2020-06-14 21:25:18 +01:00
|
|
|
Some(e) if e == '.' && is_double => {
|
|
|
|
return Err("A double with more that one '.' ???".to_string())
|
|
|
|
}
|
2020-06-14 21:02:31 +01:00
|
|
|
Some(e) if e == '.' => {
|
|
|
|
is_double = true;
|
|
|
|
string.push(e);
|
|
|
|
}
|
|
|
|
Some(e) if e.is_digit(10) => string.push(e),
|
|
|
|
Some(e) => {
|
|
|
|
self.unget_char(e);
|
|
|
|
break;
|
|
|
|
}
|
2020-06-14 21:25:18 +01:00
|
|
|
None => break,
|
2020-06-14 21:02:31 +01:00
|
|
|
}
|
|
|
|
}
|
2020-06-20 22:14:48 +01:00
|
|
|
|
|
|
|
// TODO: Move this to ast module and use the `new` function on
|
|
|
|
// Number struct
|
2020-06-14 21:02:31 +01:00
|
|
|
if is_double {
|
2020-06-20 16:25:05 +01:00
|
|
|
Ok(Expr::make_number(Number::Float(
|
|
|
|
string.parse::<f64>().unwrap(),
|
|
|
|
)))
|
2020-06-14 21:02:31 +01:00
|
|
|
} else {
|
2020-06-20 16:25:05 +01:00
|
|
|
Ok(Expr::make_number(Number::Integer(
|
|
|
|
string.parse::<i64>().unwrap(),
|
|
|
|
)))
|
2020-06-14 21:02:31 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn read_symbol<T: Read>(&mut self, reader: &mut BufReader<T>) -> ReadResult {
|
|
|
|
match self.peek_char(reader, true) {
|
|
|
|
Some(c) => match c {
|
|
|
|
'\"' => self.read_string(reader),
|
|
|
|
c if c.is_digit(10) => self.read_number(reader, false),
|
|
|
|
|
|
|
|
// ':' => self.read_keyword(reader),
|
2020-06-14 21:23:39 +01:00
|
|
|
'-' => {
|
|
|
|
// Read the '-' char
|
|
|
|
let _ = self.get_char(reader, true);
|
|
|
|
match self.peek_char(reader, true) {
|
|
|
|
Some(ch) => match ch {
|
|
|
|
ch if ch.is_digit(10) => self.read_number(reader, true),
|
|
|
|
_ => {
|
|
|
|
self.unget_char(c);
|
|
|
|
self._read_symbol(reader)
|
|
|
|
}
|
|
|
|
},
|
|
|
|
None => {
|
|
|
|
self.unget_char(c);
|
|
|
|
self._read_symbol(reader)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-06-14 21:25:18 +01:00
|
|
|
|
|
|
|
_ => self._read_symbol(reader),
|
2020-06-14 21:02:31 +01:00
|
|
|
},
|
2020-06-14 21:25:18 +01:00
|
|
|
None => Err("Unexpected EOF while scanning atom".to_string()),
|
2020-06-14 21:02:31 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-19 19:37:00 +01:00
|
|
|
pub fn ignore_comments<T: Read>(&mut self, reader: &mut BufReader<T>) -> ReadResult {
|
|
|
|
match self.get_char(reader, false) {
|
|
|
|
Some(c) => match c {
|
|
|
|
'\n' => Ok(Expr::Comment),
|
|
|
|
_ => self.ignore_comments(reader),
|
|
|
|
},
|
|
|
|
None => Ok(Expr::Comment),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-14 21:02:31 +01:00
|
|
|
pub fn read_expr<T: Read>(&mut self, reader: &mut BufReader<T>) -> ReadResult {
|
|
|
|
match self.get_char(reader, true) {
|
|
|
|
Some(c) => {
|
|
|
|
match c {
|
|
|
|
'\'' => self.read_quoted_expr(reader),
|
|
|
|
'~' => self.read_unquoted_expr(reader),
|
|
|
|
'`' => self.read_quasiquoted_expr(reader),
|
|
|
|
'(' => self.read_list(reader),
|
2020-06-19 19:37:00 +01:00
|
|
|
';' => self.ignore_comments(reader),
|
2020-06-14 21:02:31 +01:00
|
|
|
//'[' => self.read_vector(reader),
|
|
|
|
//'{' => self.read_map(reader),
|
2020-06-14 21:25:18 +01:00
|
|
|
_ => {
|
2020-06-14 21:02:31 +01:00
|
|
|
self.unget_char(c);
|
|
|
|
self.read_symbol(reader)
|
|
|
|
}
|
|
|
|
}
|
2020-06-14 21:25:18 +01:00
|
|
|
}
|
|
|
|
None => Ok(Expr::NoMatch),
|
2020-06-14 21:02:31 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-14 21:25:18 +01:00
|
|
|
pub fn read_from_buffer<T: Read>(
|
|
|
|
&mut self,
|
|
|
|
reader: &mut BufReader<T>,
|
|
|
|
) -> Result<Vec<Expr>, String> {
|
2020-06-14 21:02:31 +01:00
|
|
|
let mut ast = vec![];
|
|
|
|
loop {
|
|
|
|
match self.read_expr(reader) {
|
|
|
|
Ok(Expr::NoMatch) => break,
|
|
|
|
Err(v) => return Err(v),
|
2020-06-19 19:37:00 +01:00
|
|
|
Ok(Expr::Comment) => continue,
|
2020-06-14 21:25:18 +01:00
|
|
|
Ok(v) => ast.push(v),
|
2020-06-14 21:02:31 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
Ok(ast)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn read(&mut self, string: &str) -> Result<Vec<Expr>, String> {
|
|
|
|
let reader = BufReader::new(string.as_bytes());
|
|
|
|
let mut buf_reader = BufReader::new(reader);
|
|
|
|
self.read_from_buffer(&mut buf_reader)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn read_string(input: &str) -> Result<Vec<Expr>, String> {
|
|
|
|
let mut reader = ExprReader::new();
|
|
|
|
reader.read(input)
|
|
|
|
}
|