Create a basic parser in Golang

This commit is contained in:
Sameer Rahmani 2020-11-14 10:51:25 +00:00
parent 7f1080964a
commit 1bd3a41fb6
16 changed files with 630 additions and 43 deletions

View File

@ -1,14 +1,5 @@
THIS_DIR=$(dir $(realpath $(firstword $(MAKEFILE_LIST))))
.PHONY: build-antlr-image
build-antlr-image:
cd $(PWD)/bootstrap/grammar/ && docker build -t serene-antlr:latest .
.PHONY: gen-parser-go
gen-parser-go:
docker run -it --rm --user $(shell id -u):$(shell id -g) -v $(PWD):/serene serene-antlr:latest -Dlanguage=Go -o /serene/bootstrap/pkg/parser/ /serene/bootstrap/grammar/Serene.g4
.PHONY: lint
lint:
cd $(THIS_DIR)/bootstrap && cargo fmt -- --check

View File

@ -22,6 +22,8 @@ import (
"os"
"github.com/spf13/cobra"
"serene-lang.org/bootstrap/pkg/parser"
"serene-lang.org/bootstrap/pkg/reader"
)
var cfgFile string
@ -41,7 +43,9 @@ to redistribute it under certain conditions;
for details take a look at the LICENSE file.
`,
Run: func(cmd *cobra.Command, args []string) {
fmt.Println("BOOOOOOOOOOOOO")
reader.ReadString("sameer mary")
ast, _ := parser.ParseToAST("(asd mary)")
fmt.Printf("%s\n", ast.String())
},
}

View File

@ -3,7 +3,6 @@ module serene-lang.org/bootstrap
go 1.15
require (
github.com/mitchellh/go-homedir v1.1.0
github.com/spf13/cobra v1.1.1
github.com/spf13/viper v1.7.1
)

View File

@ -16,6 +16,8 @@ github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/antlr/antlr4 v0.0.0-20201029161626-9a95f0cc3d7c h1:j/C2kxPfyE0d87/ggAjIsCV5Cdkqmjb+O0W8W+1J+IY=
github.com/antlr/antlr4 v0.0.0-20201029161626-9a95f0cc3d7c/go.mod h1:T7PbCXFs94rrTttyxjbyT5+/1V8T2TYDejxUfHJjw1Y=
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=

View File

@ -1,5 +0,0 @@
FROM openjdk:16-jdk-alpine3.12
WORKDIR /
RUN wget https://www.antlr.org/download/antlr-4.8-complete.jar
ENTRYPOINT ["java", "-jar", "/antlr-4.8-complete.jar"]

View File

@ -1,27 +0,0 @@
grammar Serene;
/*------------------------------------------------------------------
* PARSER RULES
*------------------------------------------------------------------*/
program : expression* EOF ;
expression: OP SYMBOL (SYMBOL | STRING | NUMBER | expression)* CP;
/*------------------------------------------------------------------
* LEXER RULES
*------------------------------------------------------------------*/
SYMBOL : (LETTER (LETTER | DIGIT)*) ;
OP : '(';
CP : ')';
STRING : '"' (LETTER | DIGIT)+ '"';
NUMBER : (DIGIT)+ ;
WHITESPACE : [ \r\n\t] + -> channel (HIDDEN);
DIGIT : '0'..'9';
LETTER : LOWER | UPPER ;
LOWER : ('a'..'z') ;
UPPER : ('A'..'Z') ;

37
bootstrap/pkg/ast/ast.go Normal file
View File

@ -0,0 +1,37 @@
/*
Serene --- Yet an other Lisp
Copyright (c) 2020 Sameer Rahmani <lxsameer@gnu.org>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Package ast provides the functionality and data structures around the
// Serene's AST.
package ast
type NodeType int
const (
Nil NodeType = iota
Symbol
List
)
type ILocatable interface {
GetLocation() int
}
type ITypable interface {
GetType() NodeType
}

View File

@ -0,0 +1,26 @@
/*
Serene --- Yet an other Lisp
Copyright (c) 2020 Sameer Rahmani <lxsameer@gnu.org>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package parser
type IParsable interface {
next(skipWhitespace bool) *string
peek(skipWhitespace bool) *string
back()
GetLocation() int
}

View File

@ -0,0 +1,301 @@
/*
Serene --- Yet an other Lisp
Copyright (c) 2020 Sameer Rahmani <lxsameer@gnu.org>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Package parser provides necessary functions to generate an AST
// from an input
package parser
import (
"errors"
"fmt"
"strings"
"unicode"
"serene-lang.org/bootstrap/pkg/types"
)
var validChars = []rune{'!', '$', '%', '&', '*', '+', '-', '.', '~', '/', ':', '<', '=', '>', '?', '@', '^', '_'}
type StringParser struct {
buffer []string
pos int
}
// Implementing IParsable for StringParser ---
func (sp *StringParser) next(skipWhitespace bool) *string {
if sp.pos >= len(sp.buffer) {
return nil
}
char := sp.buffer[sp.pos]
sp.pos = sp.pos + 1
if skipWhitespace && char == " " {
return sp.next(skipWhitespace)
}
return &char
}
func (sp *StringParser) peek(skipWhitespace bool) *string {
if sp.pos >= len(sp.buffer) {
return nil
}
c := sp.buffer[sp.pos]
if c == " " && skipWhitespace {
sp.pos = sp.pos + 1
return sp.peek(skipWhitespace)
}
return &c
}
func (sp *StringParser) back() {
if sp.pos > 0 {
sp.pos = sp.pos - 1
}
}
func (sp *StringParser) GetLocation() int {
return sp.pos
}
// END: IParsable ---
func contains(s []rune, c rune) bool {
for _, v := range s {
if v == c {
return true
}
}
return false
}
func isValidForSymbol(char string) bool {
c := rune(char[0])
return contains(validChars, c) || unicode.IsLetter(c) || unicode.IsDigit(c)
}
func readRawSymbol(parser IParsable) (types.IExpr, error) {
c := parser.peek(false)
var symbol string
if c == nil {
return nil, errors.New("Unexpected EOF while parsing a symbol")
}
if isValidForSymbol(*c) {
symbol = *c
} else {
return nil, fmt.Errorf("Unexpected character: got '%s', expected a symbol at %s",
*c,
parser.GetLocation(),
)
}
for {
c := parser.next(false)
if c == nil {
break
}
if isValidForSymbol(*c) {
symbol = symbol + *c
} else {
parser.back()
break
}
}
// TODO: Add support for ns qualified symbols
return types.MakeSymbol(symbol), nil
}
func readSymbol(parser IParsable) (types.IExpr, error) {
c := parser.peek(false)
if c == nil {
return nil, errors.New("Unexpected end of file while scanning a symbol")
}
// if c == "\"" {
// return readString(parser)
// }
// if unicode.IsDigit(c) {
// readNumber(parser, false)
// }
// if c == "-" {
// parser.next(true)
// c := parser.peek(false)
// if unicode.IsDigit(c) {
// return readNumber(parser, true)
// } else {
// // Unread '-'
// parser.back()
// return readRawSymbol(parser)
// }
// }
return readRawSymbol(parser)
}
func readList(parser IParsable) (types.IExpr, error) {
list := []types.IExpr{}
for {
c := parser.peek(true)
if c == nil {
return nil, errors.New("reaching the end of file while reading a list")
}
if *c == ")" {
parser.next(true)
break
} else {
val, err := readExpr(parser)
if err != nil {
return nil, err
}
list = append(list, val)
}
}
return types.MakeList(list), nil
}
func readComment(parser IParsable) (types.IExpr, error) {
for {
c := parser.next(false)
if c == nil || *c == "\n" {
return nil, nil
}
}
}
func readQuotedExpr(parser IParsable) (types.IExpr, error) {
expr, err := readExpr(parser)
if err != nil {
return nil, err
}
return types.MakeList([]types.IExpr{
types.MakeSymbol("quote"),
expr,
}), nil
}
func readUnquotedExpr(parser IParsable) (types.IExpr, error) {
c := parser.peek(true)
if c == nil {
return nil, errors.New("end of file while reading an unquoted expression")
}
var sym types.IExpr
expr, err := readExpr(parser)
if err != nil {
return nil, err
}
if *c == "@" {
sym = types.MakeSymbol("unquote-splicing")
} else {
sym = types.MakeSymbol("unquote")
}
return types.MakeList([]types.IExpr{sym, expr}), nil
}
func readQuasiquotedExpr(parser IParsable) (types.IExpr, error) {
expr, err := readExpr(parser)
if err != nil {
return nil, err
}
return types.MakeList([]types.IExpr{
types.MakeSymbol("quasiquote"),
expr,
}), nil
}
func readExpr(parser IParsable) (types.IExpr, error) {
loop:
c := parser.next(true)
if c == nil {
// We're done reading
return nil, nil
}
if *c == "'" {
return readQuotedExpr(parser)
}
if *c == "~" {
return readUnquotedExpr(parser)
}
if *c == "`" {
return readQuasiquotedExpr(parser)
}
if *c == "(" {
return readList(parser)
}
if *c == ";" {
readComment(parser)
goto loop
}
// case '[':
// readVector(parser)
// case '{':
// readMap(parser)
parser.back()
return readSymbol(parser)
}
func ParseToAST(input string) (types.ASTree, error) {
var ast types.ASTree
parser := StringParser{
buffer: strings.Split(input, ""),
pos: 0,
}
for {
expr, err := readExpr(&parser)
if err != nil {
return nil, err
}
if expr == nil {
break
}
ast = append(ast, expr)
}
return ast, nil
}

View File

@ -0,0 +1,21 @@
/*
Serene --- Yet an other Lisp
Copyright (c) 2020 Sameer Rahmani <lxsameer@gnu.org>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package reader
func ReadString(input string) {
}

View File

@ -0,0 +1,57 @@
/*
Serene --- Yet an other Lisp
Copyright (c) 2020 Sameer Rahmani <lxsameer@gnu.org>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package types
import (
"fmt"
"strings"
"serene-lang.org/bootstrap/pkg/ast"
)
type List struct {
Node
exprs []IExpr
}
func (l List) Eval() IExpr {
return &Nil
}
func (l List) GetType() ast.NodeType {
return ast.List
}
func (l List) String() string {
var strs []string
for _, e := range l.exprs {
strs = append(strs, e.String())
}
return fmt.Sprintf("(%s)", strings.Join(strs, " "))
}
func (l List) ToDebugStr() string {
return fmt.Sprintf("%#v", l)
}
func MakeList(elements []IExpr) *List {
return &List{
exprs: elements,
}
}

View File

@ -0,0 +1,45 @@
/*
Serene --- Yet an other Lisp
Copyright (c) 2020 Sameer Rahmani <lxsameer@gnu.org>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package types
import "serene-lang.org/bootstrap/pkg/ast"
type NilType struct{}
var Nil = NilType{}
func (n NilType) Eval() IExpr {
return &Nil
}
func (n NilType) GetType() ast.NodeType {
return ast.Nil
}
func (n NilType) GetLocation() int {
return 0
}
func (n NilType) String() string {
return "nil"
}
func (n NilType) ToDebugStr() string {
return "nil"
}

View File

@ -0,0 +1,22 @@
/*
Serene --- Yet an other Lisp
Copyright (c) 2020 Sameer Rahmani <lxsameer@gnu.org>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package types
type ISeq interface {
}

View File

@ -0,0 +1,49 @@
/*
Serene --- Yet an other Lisp
Copyright (c) 2020 Sameer Rahmani <lxsameer@gnu.org>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package types
import "serene-lang.org/bootstrap/pkg/ast"
type Symbol struct {
Node
name string
}
func (s *Symbol) Eval() IExpr {
return &Nil
}
func (s *Symbol) GetType() ast.NodeType {
return ast.Symbol
}
func (s *Symbol) String() string {
// TODO: Handle ns qualified symbols here
return s.name
}
func (s *Symbol) ToDebugStr() string {
return s.name
}
func MakeSymbol(s string) *Symbol {
return &Symbol{
name: s,
}
}

View File

@ -0,0 +1,65 @@
/*
Serene --- Yet an other Lisp
Copyright (c) 2020 Sameer Rahmani <lxsameer@gnu.org>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// Package types provides the type interface of Serene. All the types
// in Serene are directly AST Nodes as well.
package types
import (
"fmt"
"strings"
"serene-lang.org/bootstrap/pkg/ast"
)
type IPrintable interface {
fmt.Stringer
}
type IDebuggable interface {
ToDebugStr() string
}
type IExpr interface {
ast.ILocatable
ast.ITypable
IPrintable
IDebuggable
Eval() IExpr
}
type Node struct {
location int
}
func (n Node) GetLocation() int {
return n.location
}
type ASTree []IExpr
func (t ASTree) String() string {
var result []string
result = append(result, "AST[")
for _, node := range t {
result = append(result, node.String())
}
result = append(result, "]")
return strings.Join(result, " ")
}