From a98c7c875f4424336d9ba776f24f212fe38679bd Mon Sep 17 00:00:00 2001 From: milarin Date: Sun, 19 Mar 2023 13:03:20 +0100 Subject: [PATCH] initial commit --- .gitignore | 2 + consts.go | 8 ++ esc_seq.go | 25 +++++ examples/example.slang | 24 +++++ examples/example2.slang | 24 +++++ examples/fizzbazz.slang | 12 +++ go.mod | 14 +++ go.sum | 10 ++ lexer.go | 223 ++++++++++++++++++++++++++++++++++++++++ token.go | 51 +++++++++ 10 files changed, 393 insertions(+) create mode 100644 .gitignore create mode 100644 consts.go create mode 100644 esc_seq.go create mode 100644 examples/example.slang create mode 100644 examples/example2.slang create mode 100644 examples/fizzbazz.slang create mode 100644 go.mod create mode 100644 go.sum create mode 100644 lexer.go create mode 100644 token.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4bf6552 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +**/*_test.go +**/*.txt diff --git a/consts.go b/consts.go new file mode 100644 index 0000000..c1fbb5b --- /dev/null +++ b/consts.go @@ -0,0 +1,8 @@ +package lexer + +var ( + Keywords = []string{"pipeline"} + Separators = []rune{'\n', ' ', '<', '>'} + StringSeparators = []rune{'\'', '"', '`'} + Operators = []rune{'='} +) diff --git a/esc_seq.go b/esc_seq.go new file mode 100644 index 0000000..0a8dbce --- /dev/null +++ b/esc_seq.go @@ -0,0 +1,25 @@ +package lexer + +import "strings" + +var EscSeqPrintReplacer = strings.NewReplacer( + `\`, `\\`, + "\n", `\n`, + "\t", `\t`, + "\f", `\f`, + "\r", `\r`, + "\v", `\v`, + "\b", `\b`, + "\a", `\a`, +) + +var EscSeqReplacer = strings.NewReplacer( + `\\`, `\`, + `\n`, "\n", + `\t`, "\t", + `\f`, "\f", + `\r`, "\r", + `\v`, "\v", + `\b`, "\b", + `\a`, "\a", +) diff --git a/examples/example.slang b/examples/example.slang new file mode 100644 index 0000000..88dd345 --- /dev/null +++ b/examples/example.slang @@ -0,0 +1,24 @@ +#!/usr/bin/env slash + +anime-watcher # comment +exit_on_empty # line 6: filter = "" +format -o '$HOME/Anime/{0}.*' # line 10: format string to file +filter -f # line 10: check if file exists +branch == "hi" # branch/split/if/filter multiple stdout fds + cat + :a="hello"> tee out1.txt + - + /home> cat + tee out2.txt +merge ordered # merge multiple stdin fds +echo + +# list all go files in pwd +ls *.go # filepath.Glob + +echo $HOME # replace env vars TOOD cd not possible in pipeline +~:A="hi" B="bye"> ls # start ls in $HOME folder + +echo -e "123\n321\nasd" +branch '^\w+?$' + echo # output: asd diff --git a/examples/example2.slang b/examples/example2.slang new file mode 100644 index 0000000..5859de0 --- /dev/null +++ b/examples/example2.slang @@ -0,0 +1,24 @@ +#!/usr/bin/env slash + +pipeline trimSpaces { # pipeline definition for future use + format -i '^\s*?(.*?)\s*?$' -o '{1}' # equivalent to functions in conventional programming languages +} + +range 0-10 # range command ignores stdin and produces output according to arguments +branch async { # branch built-in command splits input stream based on case operators + # merge operator is optional and defines the merge strategy of output streams (can be one of sync,async | default: sync) + + case >= 5: # cases represent a pipeline (can also be filtered by regex?) + ~:A="hi" B="bye"> trimSpaces # every command call is first checked for available pipeline (pwd and env variables can be provided if needed) + # pipeline cannot by called 'command' so a command with same name can still be called with `command trimSpaces` + + default: # default pipeline is optional (default: empty pipeline) +} + +# implementation of fizzbazz +range 1..100 +if (v % 3 == 0 && v % 5 == 0) {echo "fizzbazz"} +if (v % 3 == 0) {echo "fizz"} else if (v % 5 == 0) {echo "bazz"} +if (v % 5 == 0) { + echo "bazz" +} diff --git a/examples/fizzbazz.slang b/examples/fizzbazz.slang new file mode 100644 index 0000000..202d658 --- /dev/null +++ b/examples/fizzbazz.slang @@ -0,0 +1,12 @@ +#!/usr/bin/env slashlang + +pipeline fizzbazz + if v % 3 == 0 && v % 5 == 0 + print "fizzbazz" + if v % 3 == 0 + print "fizz" # else if (v % 5 == 0) {print "bazz"} + if v % 5 == 0 + print "bazz" + +range 1..100 +fizzbazz diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..66dcff2 --- /dev/null +++ b/go.mod @@ -0,0 +1,14 @@ +module git.milar.in/slash/lexer + +go 1.20 + +require ( + git.milar.in/milarin/bufr v0.0.12 + git.milar.in/milarin/slices v0.0.7 +) + +require ( + git.milar.in/milarin/adverr v1.1.0 // indirect + git.milar.in/milarin/ds v0.0.2 // indirect + git.milar.in/milarin/gmath v0.0.3 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..03d9f1d --- /dev/null +++ b/go.sum @@ -0,0 +1,10 @@ +git.milar.in/milarin/adverr v1.1.0 h1:jD9WnOvs40lfMhvqQ7cllOaRJNBMWr1f07/s9jAadp0= +git.milar.in/milarin/adverr v1.1.0/go.mod h1:joU9sBb7ySyNv4SpTXB0Z4o1mjXsArBw4N27wjgzj9E= +git.milar.in/milarin/bufr v0.0.12 h1:BZwLFOdi5hohQuugQceFHwvmz7ZGYwyhdrBcKfZPjGs= +git.milar.in/milarin/bufr v0.0.12/go.mod h1:yIRL89LWUgRlmfuVAwq12YfFs+Hq2Ji4SKEUyqXVTLo= +git.milar.in/milarin/ds v0.0.2 h1:vCA3mDxZUNfvHpzrdz7SeBUKiPn74NTopo915IUG7I0= +git.milar.in/milarin/ds v0.0.2/go.mod h1:HJK7QERcRvV9j7xzEocrKUtW+1q4JB1Ly4Bj54chfwI= +git.milar.in/milarin/gmath v0.0.3 h1:ii6rKNItS55O/wtIFhD1cTN2BMwDZjTBmiOocKURvxM= +git.milar.in/milarin/gmath v0.0.3/go.mod h1:HDLftG5RLpiNGKiIWh+O2G1PYkNzyLDADO8Cd/1abiE= +git.milar.in/milarin/slices v0.0.7 h1:s+e8W+pATa2NrAtniruUoNfjpmlTVQgyKu4ttfkE1cU= +git.milar.in/milarin/slices v0.0.7/go.mod h1:qMhdtMnfWswc1rHpwgNw33lB84aNEkdBn5BDiYA+G3k= diff --git a/lexer.go b/lexer.go new file mode 100644 index 0000000..6f8b73d --- /dev/null +++ b/lexer.go @@ -0,0 +1,223 @@ +package lexer + +import ( + "fmt" + "io" + + "git.milar.in/milarin/bufr" + "git.milar.in/milarin/slices" +) + +type Lexer struct { + src *bufr.Reader + Indent string +} + +func New(r io.Reader) *Lexer { + return &Lexer{ + src: bufr.New(r), + } +} + +func (t *Lexer) Pos() bufr.Position { + index, line, column := t.src.Pos() + return bufr.Position{Index: index, Line: line, Column: column} +} + +func (t *Lexer) Next() (*Token, error) { + rn, err := t.src.Rune() + if err != nil { + return nil, err + } + + rn2, err := t.src.Rune() + if err != nil { + return nil, err + } + + if err := t.src.UnreadRunes(2); err != nil { + return nil, err + } + + if rn == '\t' || (rn == ' ' && rn2 == ' ') { + return t.parseIndent() + } else if rn == '#' { + return t.parseComment() + } else if slices.Contains(Separators, rn) { + return t.parseSeparator() + } else if slices.Contains(StringSeparators, rn) { + return t.parseStringLiteral() + } else if slices.Contains(Operators, rn) { + return t.parseOperator() + } + + str, err := t.src.PeekStringUntil(bufr.OneOf(" \n")) + if err != nil { + return nil, err + } + + if slices.Contains(Keywords, str) { + return t.parseKeyword() + } + + return t.parseWord() +} + +func (t *Lexer) parseComment() (*Token, error) { + start := t.Pos() + + comment, err := t.src.StringUntil(bufr.IsNewLine) + if err != nil { + return nil, err + } + + if err := t.src.UnreadRune(); err != nil { + return nil, err + } + + return &Token{ + Type: TokenTypeComment, + Value: comment, + Start: start, + End: t.Pos(), + }, nil +} + +func (t *Lexer) parseKeyword() (*Token, error) { + start := t.Pos() + + keyword, err := t.src.StringUntil(bufr.IsWhitespace) + if err != nil { + return nil, err + } + + if err := t.src.UnreadRune(); err != nil { + return nil, err + } + + return &Token{ + Type: TokenTypeKeyword, + Value: keyword, + Start: start, + End: t.Pos(), + }, nil +} + +func (t *Lexer) parseWord() (*Token, error) { + start := t.Pos() + + word, err := t.src.StringUntil(bufr.IsWhitespace, bufr.Is('=')) + if err != nil { + return nil, err + } + + if err := t.src.UnreadRune(); err != nil { + return nil, err + } + + return &Token{ + Type: TokenTypeWord, + Value: word, + Start: start, + End: t.Pos(), + }, nil +} + +func (t *Lexer) parseSeparator() (*Token, error) { + start := t.Pos() + + rn, err := t.src.Rune() + if err != nil { + return nil, err + } + + return &Token{ + Type: TokenTypeSeparator, + Value: string(rn), + Start: start, + End: t.Pos(), + }, nil +} + +func (t *Lexer) parseOperator() (*Token, error) { + start := t.Pos() + + rn, err := t.src.Rune() + if err != nil { + return nil, err + } + + return &Token{ + Type: TokenTypeOperator, + Value: string(rn), + Start: start, + End: t.Pos(), + }, nil +} + +func (t *Lexer) parseStringLiteral() (*Token, error) { + start := t.Pos() + + startRn, err := t.src.Rune() + if err != nil { + return nil, err + } + + literal, err := t.src.StringUntil(bufr.Is(startRn)) + if err != nil { + return nil, err + } + + literal = EscSeqReplacer.Replace(literal) + + return &Token{ + Type: TokenTypeWord, + Value: literal, + Start: start, + End: t.Pos(), + }, nil +} + +func (t *Lexer) parseIndent() (*Token, error) { + start := t.Pos() + + // no indentation set yet + if t.Indent == "" { + str, err := t.src.StringWhile(bufr.OneOf("\t ")) + if err != nil { + return nil, err + } + + if err := t.src.UnreadRune(); err != nil { + return nil, err + } + + //fmt.Printf("indentation set to '%s'\n", EscSeqReplacer.Replace(str)) + t.Indent = str + + return &Token{ + Type: TokenTypeIndent, + Value: str, + Start: start, + End: t.Pos(), + }, nil + } + + for _, rn := range t.Indent { + ok, err := t.src.ExpectRune(bufr.Is(rn)) + if err != nil { + return nil, err + } + + if !ok { + return nil, fmt.Errorf("mixed indentation styles at (%d:%d)", start.Line, start.Column) + } + } + + return &Token{ + Type: TokenTypeIndent, + Value: t.Indent, + Start: start, + End: t.Pos(), + }, nil +} diff --git a/token.go b/token.go new file mode 100644 index 0000000..3ed7c71 --- /dev/null +++ b/token.go @@ -0,0 +1,51 @@ +package lexer + +import ( + "fmt" + + "git.milar.in/milarin/bufr" +) + +type Token struct { + Type TokenType + Value string + Start bufr.Position + End bufr.Position +} + +func (t Token) String() string { + return fmt.Sprintf( + "type: %s | value: '%s' | start: (%d:%d) | end: (%d:%d)", + t.Type, EscSeqPrintReplacer.Replace(t.Value), t.Start.Line, t.Start.Column, t.End.Line, t.End.Column, + ) +} + +type TokenType uint8 + +const ( + TokenTypeComment TokenType = iota + TokenTypeKeyword + TokenTypeIndent + TokenTypeSeparator + TokenTypeWord + TokenTypeOperator +) + +func (tt TokenType) String() string { + switch tt { + case TokenTypeComment: + return "comment" + case TokenTypeKeyword: + return "keyword" + case TokenTypeIndent: + return "indent" + case TokenTypeSeparator: + return "separator" + case TokenTypeWord: + return "word" + case TokenTypeOperator: + return "operator" + default: + panic(fmt.Sprintf("invalid token type: %d", tt)) + } +}