543 lines
12 KiB
Go
543 lines
12 KiB
Go
/*
|
|
Serene --- Yet an other Lisp
|
|
|
|
Copyright (c) 2020 Sameer Rahmani <lxsameer@gnu.org>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
package core
|
|
|
|
// Parser Implementation:
|
|
// * `ParseToAST` is the entry point of the parser
|
|
// * It's a manual parser with look ahead factor of (1)
|
|
// * It parsers the input string to a tree of `IEpxr`s
|
|
//
|
|
// TODOs:
|
|
// * Add a shortcut for anonymous functions similar to `#(...)` clojure
|
|
// syntax
|
|
// * Add the support for strings
|
|
// * Add the support for kewords
|
|
// * Add a shortcut for the `deref` function like `@x` => `(deref x)`
|
|
|
|
import (
|
|
"strings"
|
|
"unicode"
|
|
)
|
|
|
|
// An array of the valid characters that be be used in a symbol
|
|
var validChars = []rune{'!', '$', '%', '&', '*', '+', '-', '.', '~', '/', ':', '<', '=', '>', '?', '@', '^', '_'}
|
|
|
|
// IParsable defines the common interface which any parser has to implement.
|
|
type IParsable interface {
|
|
// Reads the next character in the buffer with respect to skipWhitespace
|
|
// parameter which basically jumps over whitespace and some conceptual
|
|
// equivilant of a whitespace like '\n'
|
|
next(skipWhitespace bool) *string
|
|
|
|
// Similar to the `next` but it won't change the position in the buffer
|
|
// so an imidiate `next` function after a `peek` will read the same char
|
|
// but will move the position, and a series of `peek` calls will read the
|
|
// same function over and over again without changing the position in the
|
|
// buffer.
|
|
peek(skipWhitespace bool) *string
|
|
|
|
// Moves back the position by one in the buffer.
|
|
back()
|
|
|
|
// Returns the current position in the buffer
|
|
GetLocation() int
|
|
Buffer() *[]string
|
|
}
|
|
|
|
// StringParser is an implementation of the IParsable that operates on strings.
|
|
// To put it simply it parses input strings
|
|
type StringParser struct {
|
|
buffer []string
|
|
pos int
|
|
}
|
|
|
|
// Implementing IParsable for StringParser ---
|
|
|
|
// Returns the next character in the buffer
|
|
func (sp *StringParser) next(skipWhitespace bool) *string {
|
|
if sp.pos >= len(sp.buffer) {
|
|
return nil
|
|
}
|
|
char := sp.buffer[sp.pos]
|
|
sp.pos = sp.pos + 1
|
|
|
|
if skipWhitespace && isSeparator(&char) {
|
|
return sp.next(skipWhitespace)
|
|
}
|
|
|
|
return &char
|
|
}
|
|
|
|
// isSeparator returns a boolean indicating whether the given character `c`
|
|
// contains a separator or not. In a Lisp whitespace and someother characters
|
|
// are conceptually the same and we need to treat them the same as well.
|
|
func isSeparator(c *string) bool {
|
|
|
|
if c == nil {
|
|
return false
|
|
}
|
|
|
|
r := []rune(*c)[0]
|
|
if r == ' ' || r == '\t' || r == '\n' || r == '\f' {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
// Return the character of the buffer without consuming it
|
|
func (sp *StringParser) peek(skipWhitespace bool) *string {
|
|
if sp.pos >= len(sp.buffer) {
|
|
return nil
|
|
}
|
|
|
|
c := sp.buffer[sp.pos]
|
|
if isSeparator(&c) && skipWhitespace {
|
|
sp.pos = sp.pos + 1
|
|
return sp.peek(skipWhitespace)
|
|
}
|
|
return &c
|
|
}
|
|
|
|
// Move the char pointer back by one character
|
|
func (sp *StringParser) back() {
|
|
if sp.pos > 0 {
|
|
sp.pos = sp.pos - 1
|
|
}
|
|
}
|
|
|
|
func (sp *StringParser) GetLocation() int {
|
|
return sp.pos
|
|
}
|
|
|
|
func (sp *StringParser) Buffer() *[]string {
|
|
return &sp.buffer
|
|
}
|
|
|
|
// END: IParsable ---
|
|
|
|
// makeErrorAtPoint is a helper function which generates an `IError` that
|
|
// points at the current position of the buffer.
|
|
func makeErrorAtPoint(p IParsable, msg string, a ...interface{}) IError {
|
|
n := MakeSinglePointNode(p.Buffer(), p.GetLocation())
|
|
return MakeParsetimeErrorf(n, msg, a...)
|
|
}
|
|
|
|
// makeErrorFromError is a function which wraps a Golang error in an IError
|
|
func makeErrorFromError(parser IParsable, e error) IError {
|
|
return makeErrorAtPoint(parser, "%w", e)
|
|
}
|
|
|
|
func contains(s []rune, c rune) bool {
|
|
for _, v := range s {
|
|
if v == c {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func isValidForSymbol(char string) bool {
|
|
c := rune(char[0])
|
|
return contains(validChars, c) || unicode.IsLetter(c) || unicode.IsDigit(c)
|
|
}
|
|
|
|
func readKeyword(parser IParsable) (IExpr, IError) {
|
|
symbol, err := readRawSymbol(parser)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
node := MakeNodeFromExpr(symbol)
|
|
return MakeKeyword(node, ":"+symbol.(*Symbol).String())
|
|
}
|
|
|
|
//readRawSymbol reads a symbol from the current position forward
|
|
func readRawSymbol(parser IParsable) (IExpr, IError) {
|
|
c := parser.peek(false)
|
|
var symbol string
|
|
|
|
if c == nil {
|
|
return nil, makeErrorAtPoint(parser, "unexpected enf of file while parsing a symbol")
|
|
}
|
|
|
|
// Does the symbol starts with a valid character or not
|
|
if isValidForSymbol(*c) {
|
|
parser.next(false)
|
|
symbol = *c
|
|
} else {
|
|
return nil, makeErrorAtPoint(parser,
|
|
"unexpected character: got '%s', expected a symbol at %d",
|
|
*c,
|
|
parser.GetLocation(),
|
|
)
|
|
}
|
|
|
|
// read the rest of the symbol
|
|
for {
|
|
c := parser.next(false)
|
|
|
|
if c == nil {
|
|
break
|
|
}
|
|
|
|
if isValidForSymbol(*c) {
|
|
symbol = symbol + *c
|
|
} else {
|
|
parser.back()
|
|
break
|
|
}
|
|
}
|
|
|
|
node := MakeNode(parser.Buffer(), parser.GetLocation()-len(symbol), parser.GetLocation())
|
|
sym, err := MakeSymbol(node, symbol)
|
|
|
|
if err != nil {
|
|
err.SetNode(&node)
|
|
return nil, err
|
|
}
|
|
|
|
return sym, nil
|
|
}
|
|
|
|
func readString(parser IParsable) (IExpr, IError) {
|
|
str := ""
|
|
|
|
for {
|
|
c := parser.next(false)
|
|
if c == nil {
|
|
return nil, makeErrorAtPoint(parser, "reached end of file while scanning a string")
|
|
}
|
|
|
|
if *c == "\"" {
|
|
node := MakeNode(parser.Buffer(), parser.GetLocation()-len(str), parser.GetLocation())
|
|
return MakeString(node, str), nil
|
|
}
|
|
|
|
if *c == "\\" {
|
|
c = parser.next(false)
|
|
switch *c {
|
|
case "n":
|
|
str = str + "\n"
|
|
case "t":
|
|
str = str + "\t"
|
|
case "r":
|
|
str = str + "\r"
|
|
case "\\":
|
|
str = str + "\\"
|
|
case "\"":
|
|
str = str + "\""
|
|
default:
|
|
return nil, makeErrorAtPoint(parser, "Unsupported escape character: \\%s", *c)
|
|
}
|
|
} else {
|
|
str = str + *c
|
|
}
|
|
}
|
|
}
|
|
|
|
// readNumber reads a number with respect to its sign and whether it's, a ...interface{}
|
|
// a decimal or a float
|
|
func readNumber(parser IParsable, neg bool) (IExpr, IError) {
|
|
isDouble := false
|
|
result := ""
|
|
|
|
if neg {
|
|
result = "-"
|
|
}
|
|
|
|
for {
|
|
c := parser.next(false)
|
|
|
|
if c == nil {
|
|
break
|
|
}
|
|
|
|
if *c == "." && isDouble {
|
|
return nil, makeErrorAtPoint(parser, "a double with more that one '.' ???")
|
|
}
|
|
|
|
if *c == "." {
|
|
isDouble = true
|
|
result = result + *c
|
|
continue
|
|
}
|
|
|
|
// Weird, But go won't stop complaining without this swap
|
|
char := *c
|
|
r := rune(char[0])
|
|
if unicode.IsDigit(r) {
|
|
result = result + *c
|
|
} else {
|
|
parser.back()
|
|
break
|
|
}
|
|
}
|
|
|
|
value, err := MakeNumberFromStr(result, isDouble)
|
|
|
|
if err != nil {
|
|
return nil, makeErrorFromError(parser, err)
|
|
}
|
|
|
|
return value, nil
|
|
}
|
|
|
|
// readSymbol reads a symbol and return the appropriate type of expression
|
|
// based on the symbol conditions. For example it will read a number if the
|
|
// symbol starts with a number or a neg sign or a string if it starts with '\"'
|
|
// and a raw symbol otherwise
|
|
func readSymbol(parser IParsable) (IExpr, IError) {
|
|
c := parser.peek(false)
|
|
|
|
if c == nil {
|
|
return nil, makeErrorAtPoint(parser, "unexpected end of file while scanning a symbol")
|
|
}
|
|
|
|
if *c == "\"" {
|
|
parser.next(false)
|
|
return readString(parser)
|
|
}
|
|
|
|
// Weird, But go won't stop complaining without this swap
|
|
char := *c
|
|
r := rune(char[0])
|
|
if unicode.IsDigit(r) {
|
|
return readNumber(parser, false)
|
|
}
|
|
|
|
if *c == "-" {
|
|
parser.next(true)
|
|
c := parser.peek(false)
|
|
|
|
// Weird, But go won't stop complaining without this swap
|
|
char := *c
|
|
r := rune(char[0])
|
|
|
|
if unicode.IsDigit(r) {
|
|
return readNumber(parser, true)
|
|
} else {
|
|
// Unread '-'
|
|
parser.back()
|
|
return readRawSymbol(parser)
|
|
}
|
|
|
|
}
|
|
return readRawSymbol(parser)
|
|
}
|
|
|
|
// readList reads a List recursively.
|
|
func readList(parser IParsable) (IExpr, IError) {
|
|
list := []IExpr{}
|
|
|
|
for {
|
|
c := parser.peek(true)
|
|
if c == nil {
|
|
return nil, makeErrorAtPoint(parser, "reaching the end of file while reading a list")
|
|
}
|
|
if *c == ")" {
|
|
parser.next(true)
|
|
break
|
|
} else {
|
|
val, err := readExpr(parser)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
list = append(list, val)
|
|
|
|
}
|
|
}
|
|
|
|
return MakeList(list), nil
|
|
}
|
|
|
|
func readComment(parser IParsable) (IExpr, IError) {
|
|
for {
|
|
c := parser.next(false)
|
|
if c == nil || *c == "\n" {
|
|
return nil, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
// readQuotedExpr reads quoted expression ( lie 'something ) by replaceing the
|
|
// quote with a call to `quote` special form so 'something => (quote something)
|
|
func readQuotedExpr(parser IParsable) (IExpr, IError) {
|
|
expr, err := readExpr(parser)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
symNode := MakeNode(parser.Buffer(), parser.GetLocation(), parser.GetLocation())
|
|
sym, err := MakeSymbol(symNode, "quote")
|
|
|
|
if err != nil {
|
|
err.SetNode(&symNode)
|
|
return nil, err
|
|
}
|
|
|
|
return MakeList([]IExpr{
|
|
sym,
|
|
expr,
|
|
}), nil
|
|
}
|
|
|
|
// readUnquotedExpr reads different unquoting expressions from their short representaions.
|
|
// ~a => (unquote a)
|
|
// ~@a => (unquote-splicing a)
|
|
// Note: `unquote` and `unquote-splicing` are not global functions or special, they are bounded
|
|
// to quasiquoted experssions only.
|
|
func readUnquotedExpr(parser IParsable) (IExpr, IError) {
|
|
c := parser.peek(true)
|
|
|
|
if c == nil {
|
|
return nil, makeErrorAtPoint(parser, "end of file while reading an unquoted expression")
|
|
}
|
|
|
|
var sym IExpr
|
|
var err IError
|
|
var expr IExpr
|
|
|
|
node := MakeNode(parser.Buffer(), parser.GetLocation(), parser.GetLocation())
|
|
|
|
if *c == "@" {
|
|
parser.next(true)
|
|
sym, err = MakeSymbol(node, "unquote-splicing")
|
|
if err != nil {
|
|
err.SetNode(&node)
|
|
} else {
|
|
expr, err = readExpr(parser)
|
|
}
|
|
|
|
} else {
|
|
sym, err = MakeSymbol(node, "unquote")
|
|
if err != nil {
|
|
err.SetNode(&node)
|
|
} else {
|
|
expr, err = readExpr(parser)
|
|
}
|
|
}
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return MakeList([]IExpr{sym, expr}), nil
|
|
}
|
|
|
|
// readQuasiquotedExpr reads the backquote and replace it with a call
|
|
// to the `quasiquote` macro.
|
|
func readQuasiquotedExpr(parser IParsable) (IExpr, IError) {
|
|
expr, err := readExpr(parser)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
node := MakeNode(parser.Buffer(), parser.GetLocation(), parser.GetLocation())
|
|
sym, err := MakeSymbol(node, "quasiquote")
|
|
if err != nil {
|
|
err.SetNode(&node)
|
|
return nil, err
|
|
}
|
|
return MakeList([]IExpr{
|
|
sym,
|
|
expr,
|
|
}), nil
|
|
}
|
|
|
|
// readExpr reads one expression from the input. This function is the most
|
|
// important function in the parser which dispatches the call to different
|
|
// reader functions based on the first character
|
|
func readExpr(parser IParsable) (IExpr, IError) {
|
|
|
|
loop:
|
|
c := parser.next(true)
|
|
|
|
if c == nil {
|
|
// We're done reading
|
|
return nil, nil
|
|
}
|
|
|
|
if *c == "'" {
|
|
return readQuotedExpr(parser)
|
|
}
|
|
|
|
if *c == "~" {
|
|
return readUnquotedExpr(parser)
|
|
}
|
|
|
|
if *c == "`" {
|
|
return readQuasiquotedExpr(parser)
|
|
}
|
|
if *c == "(" {
|
|
return readList(parser)
|
|
}
|
|
if *c == ";" {
|
|
readComment(parser)
|
|
goto loop
|
|
}
|
|
|
|
if *c == ":" {
|
|
return readKeyword(parser)
|
|
}
|
|
// if *c == "[" {
|
|
// readVector(parser)
|
|
// }
|
|
|
|
// if *c == "{" {
|
|
// readMap(parser)
|
|
// }
|
|
parser.back()
|
|
return readSymbol(parser)
|
|
|
|
}
|
|
|
|
//ParseToAST is the entry function to the reader/parser which
|
|
// converts the `input` string to a `Block` of code. A block
|
|
// by itself is not something available to the language. It's
|
|
// just anbstraction for a ordered collection of expressions.
|
|
// It doesn't have anything to do with the concept of blocks
|
|
// from other programming languages
|
|
func ParseToAST(input string) (*Block, IError) {
|
|
|
|
var ast Block
|
|
parser := StringParser{
|
|
buffer: strings.Split(input, ""),
|
|
pos: 0,
|
|
}
|
|
|
|
for {
|
|
expr, err := readExpr(&parser)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if expr == nil {
|
|
break
|
|
}
|
|
|
|
ast.Append(expr)
|
|
}
|
|
|
|
return &ast, nil
|
|
}
|