/* Serene --- Yet an other Lisp Copyright (c) 2020 Sameer Rahmani This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ package core // Parser Implementation: // * `ParseToAST` is the entry point of the parser // * It's a manual parser with look ahead factor of (1) // * It parsers the input string to a tree of `IEpxr`s // // TODOs: // * Add a shortcut for anonymous functions similar to `#(...)` clojure // syntax // * Add the support for strings // * Add the support for kewords // * Add a shortcut for the `deref` function like `@x` => `(deref x)` // * A line of comment at the end of a list definition causes a synxtax error. // We need to fix it. For example: // (asdb xyz // ;; problematic comment line // ) // Will fails. The reason being we call `readExpr` in `readList` and in the // `readExpr` when we read a line of comment we jump to a label and try to // read another expr which in our case it would read the end of list and throw // and error import ( "strings" "unicode" "serene-lang.org/bootstrap/pkg/ast" ) // An array of the valid characters that be be used in a symbol var validChars = []rune{'!', '$', '%', '&', '*', '+', '-', '.', '~', '/', ':', '<', '=', '>', '?', '@', '^', '_'} // IParsable defines the common interface which any parser has to implement. type IParsable interface { // Reads the next character in the buffer with respect to skipWhitespace // parameter which basically jumps over whitespace and some conceptual // equivilant of a whitespace like '\n' next(skipWhitespace bool) *string // Similar to the `next` but it won't change the position in the buffer // so an imidiate `next` function after a `peek` will read the same char // but will move the position, and a series of `peek` calls will read the // same function over and over again without changing the position in the // buffer. peek(skipWhitespace bool) *string // Moves back the position by one in the buffer. back() // Returns the current position in the buffer GetLocation() int GetSource() *ast.Source Buffer() *[]string } // StringParser is an implementation of the IParsable that operates on strings. // To put it simply it parses input strings type StringParser struct { buffer []string pos int source string // This slice holds the boundaries of lines in the buffer. Basically // each element determines the position which a line ends and the line // number directly maps to the position of it's boundary in the slice. lineIndex []int } // Implementing IParsable for StringParser --- // updateLineIndex reads the current character and if it is an end of line, then // it will update the line index to add the boundaries of the current line. func (sp *StringParser) updateLineIndex(pos int) { if pos < len(sp.buffer) { c := sp.buffer[pos] if c == "\n" { if len(sp.lineIndex) > 0 { if sp.lineIndex[len(sp.lineIndex)-1] != pos+1 { // Including the \n itself sp.lineIndex = append(sp.lineIndex, pos+1) } } else { sp.lineIndex = append(sp.lineIndex, pos+1) } } } } // Returns the next character in the buffer func (sp *StringParser) next(skipWhitespace bool) *string { if sp.pos >= len(sp.buffer) { return nil } char := sp.buffer[sp.pos] sp.updateLineIndex(sp.pos) sp.pos = sp.pos + 1 if skipWhitespace && isSeparator(&char) { return sp.next(skipWhitespace) } return &char } // isSeparator returns a boolean indicating whether the given character `c` // contains a separator or not. In a Lisp whitespace and someother characters // are conceptually the same and we need to treat them the same as well. func isSeparator(c *string) bool { if c == nil { return false } r := []rune(*c)[0] if r == ' ' || r == '\t' || r == '\n' || r == '\f' { return true } return false } // Return the character of the buffer without consuming it func (sp *StringParser) peek(skipWhitespace bool) *string { if sp.pos >= len(sp.buffer) { return nil } c := sp.buffer[sp.pos] if isSeparator(&c) && skipWhitespace { sp.updateLineIndex(sp.pos) sp.pos = sp.pos + 1 return sp.peek(skipWhitespace) } return &c } // Move the char pointer back by one character func (sp *StringParser) back() { if sp.pos > 0 { sp.pos = sp.pos - 1 } } func (sp *StringParser) GetLocation() int { return sp.pos } func (sp *StringParser) GetSource() *ast.Source { return &ast.Source{ Buffer: &sp.buffer, NS: sp.source, LineIndex: &sp.lineIndex, } } func (sp *StringParser) Buffer() *[]string { return &sp.buffer } // END: IParsable --- // makeErrorAtPoint is a helper function which generates an `IError` that // points at the current position of the buffer. func makeErrorAtPoint(p IParsable, msg string, a ...interface{}) IError { n := MakeSinglePointNode(p.GetSource(), p.GetLocation()) return MakeSyntaxErrorf(n, msg, a...) } // makeErrorFromError is a function which wraps a Golang error in an IError func makeErrorFromError(parser IParsable, e error) IError { return makeErrorAtPoint(parser, "%w", e) } func contains(s []rune, c rune) bool { for _, v := range s { if v == c { return true } } return false } func isValidForSymbol(char string) bool { c := rune(char[0]) return contains(validChars, c) || unicode.IsLetter(c) || unicode.IsDigit(c) } func readKeyword(parser IParsable) (IExpr, IError) { symbol, err := readRawSymbol(parser) if err != nil { return nil, err } node := MakeNodeFromExpr(symbol) return MakeKeyword(node, ":"+symbol.(*Symbol).String()) } //readRawSymbol reads a symbol from the current position forward func readRawSymbol(parser IParsable) (IExpr, IError) { c := parser.peek(false) var symbol string if c == nil { return nil, makeErrorAtPoint(parser, "unexpected enf of file while parsing a symbol") } // Does the symbol starts with a valid character or not if isValidForSymbol(*c) { parser.next(false) symbol = *c } else { return nil, makeErrorAtPoint(parser, "unexpected character: got '%s', expected a symbol at %d", *c, parser.GetLocation(), ) } // read the rest of the symbol for { c := parser.next(false) if c == nil { break } if isValidForSymbol(*c) { symbol = symbol + *c } else { parser.back() break } } node := MakeNode(parser.GetSource(), parser.GetLocation()-len(symbol), parser.GetLocation()) sym, err := MakeSymbol(node, symbol) if err != nil { err.SetNode(&node) return nil, err } return sym, nil } func readString(parser IParsable) (IExpr, IError) { str := "" for { c := parser.next(false) if c == nil { return nil, makeErrorAtPoint(parser, "reached end of file while scanning a string") } if *c == "\"" { node := MakeNode(parser.GetSource(), parser.GetLocation()-len(str), parser.GetLocation()) return MakeString(node, str), nil } if *c == "\\" { c = parser.next(false) switch *c { case "n": str = str + "\n" case "t": str = str + "\t" case "r": str = str + "\r" case "\\": str = str + "\\" case "\"": str = str + "\"" default: return nil, makeErrorAtPoint(parser, "Unsupported escape character: \\%s", *c) } } else { str = str + *c } } } // readNumber reads a number with respect to its sign and whether it's, a ...interface{} // a decimal or a float func readNumber(parser IParsable, neg bool) (IExpr, IError) { isDouble := false result := "" if neg { result = "-" } for { c := parser.next(false) if c == nil { break } if *c == "." && isDouble { return nil, makeErrorAtPoint(parser, "a double with more that one '.' ???") } if *c == "." { isDouble = true result = result + *c continue } // Weird, But go won't stop complaining without this swap char := *c r := rune(char[0]) if unicode.IsDigit(r) { result = result + *c } else if isValidForSymbol(char) { return nil, makeErrorAtPoint(parser, "Illegal token while scanning for a number.") } else { parser.back() break } } n := MakeSinglePointNode(parser.GetSource(), parser.GetLocation()) n.location.DecStart(len(result)) value, err := MakeNumberFromStr(n, result, isDouble) if err != nil { return nil, makeErrorFromError(parser, err) } return value, nil } // readSymbol reads a symbol and return the appropriate type of expression // based on the symbol conditions. For example it will read a number if the // symbol starts with a number or a neg sign or a string if it starts with '\"' // and a raw symbol otherwise func readSymbol(parser IParsable) (IExpr, IError) { c := parser.peek(false) if c == nil { return nil, makeErrorAtPoint(parser, "unexpected end of file while scanning a symbol") } if *c == "\"" { parser.next(false) return readString(parser) } // Weird, But go won't stop complaining without this swap char := *c r := rune(char[0]) if unicode.IsDigit(r) { return readNumber(parser, false) } if *c == "-" { parser.next(true) c := parser.peek(false) // Weird, But go won't stop complaining without this swap char := *c r := rune(char[0]) if unicode.IsDigit(r) { return readNumber(parser, true) } else { // Unread '-' parser.back() return readRawSymbol(parser) } } return readRawSymbol(parser) } // readList reads a List recursively. func readList(parser IParsable) (IExpr, IError) { list := []IExpr{} for { c := parser.peek(true) if c == nil { return nil, makeErrorAtPoint(parser, "reaching the end of file while reading a list") } if *c == ")" { parser.next(true) break } else { val, err := readExpr(parser) if err != nil { return nil, err } list = append(list, val) } } node := MakeNodeFromExprs(list) if node == nil { n := MakeSinglePointNode(parser.GetSource(), parser.GetLocation()) node = &n } node.location.DecStart(1) node.location.IncEnd(1) return MakeList(*node, list), nil } func readComment(parser IParsable) (IExpr, IError) { for { c := parser.next(false) if c == nil || *c == "\n" { return nil, nil } } } // readQuotedExpr reads quoted expression ( lie 'something ) by replaceing the // quote with a call to `quote` special form so 'something => (quote something) func readQuotedExpr(parser IParsable) (IExpr, IError) { expr, err := readExpr(parser) if err != nil { return nil, err } symNode := MakeNode(parser.GetSource(), parser.GetLocation(), parser.GetLocation()) sym, err := MakeSymbol(symNode, "quote") if err != nil { err.SetNode(&symNode) return nil, err } listElems := []IExpr{ sym, expr, } listNode := MakeNodeFromExprs(listElems) if listNode == nil { n := MakeSinglePointNode(parser.GetSource(), parser.GetLocation()) listNode = &n } listNode.location.DecStart(1) listNode.location.IncStart(1) return MakeList(*listNode, listElems), nil } // readUnquotedExpr reads different unquoting expressions from their short representaions. // ~a => (unquote a) // ~@a => (unquote-splicing a) // Note: `unquote` and `unquote-splicing` are not global functions or special, they are bounded // to quasiquoted experssions only. func readUnquotedExpr(parser IParsable) (IExpr, IError) { c := parser.peek(true) if c == nil { return nil, makeErrorAtPoint(parser, "end of file while reading an unquoted expression") } var sym IExpr var err IError var expr IExpr node := MakeNode(parser.GetSource(), parser.GetLocation(), parser.GetLocation()) if *c == "@" { parser.next(true) sym, err = MakeSymbol(node, "unquote-splicing") if err != nil { err.SetNode(&node) } else { expr, err = readExpr(parser) } } else { sym, err = MakeSymbol(node, "unquote") if err != nil { err.SetNode(&node) } else { expr, err = readExpr(parser) } } if err != nil { return nil, err } listElems := []IExpr{sym, expr} listNode := MakeNodeFromExprs(listElems) // listNode won't be nil in this case but it doesn't // mean we shouldn't check if listNode == nil { n := MakeSinglePointNode(parser.GetSource(), parser.GetLocation()) listNode = &n } listNode.location.DecStart(1) listNode.location.IncStart(1) return MakeList(*listNode, listElems), nil } // readQuasiquotedExpr reads the backquote and replace it with a call // to the `quasiquote` macro. func readQuasiquotedExpr(parser IParsable) (IExpr, IError) { expr, err := readExpr(parser) if err != nil { return nil, err } node := MakeNode(parser.GetSource(), parser.GetLocation(), parser.GetLocation()) sym, err := MakeSymbol(node, "quasiquote") if err != nil { err.SetNode(&node) return nil, err } listElems := []IExpr{sym, expr} listNode := MakeNodeFromExprs(listElems) // listNode won't be nil in this case but it doesn't // mean we shouldn't check if listNode == nil { n := MakeSinglePointNode(parser.GetSource(), parser.GetLocation()) listNode = &n } listNode.location.DecStart(1) listNode.location.IncStart(1) return MakeList(*listNode, listElems), nil } // readExpr reads one expression from the input. This function is the most // important function in the parser which dispatches the call to different // reader functions based on the first character func readExpr(parser IParsable) (IExpr, IError) { loop: c := parser.next(true) if c == nil { // We're done reading return nil, nil } if *c == "'" { return readQuotedExpr(parser) } if *c == "~" { return readUnquotedExpr(parser) } if *c == "`" { return readQuasiquotedExpr(parser) } if *c == "(" { return readList(parser) } if *c == ";" { readComment(parser) goto loop } if *c == ":" { return readKeyword(parser) } // if *c == "[" { // readVector(parser) // } // if *c == "{" { // readMap(parser) // } parser.back() return readSymbol(parser) } //ParseToAST is the entry function to the reader/parser which // converts the `input` string to a `Block` of code. A block // by itself is not something available to the language. It's // just anbstraction for a ordered collection of expressions. // It doesn't have anything to do with the concept of blocks // from other programming languages. func ParseToAST(ns string, input string) (*Block, IError) { var ast Block parser := StringParser{ buffer: strings.Split(input, ""), pos: 0, source: ns, } for { expr, err := readExpr(&parser) if err != nil { return nil, err } if expr == nil { break } ast.Append(expr) } return &ast, nil }