initial commit

This commit is contained in:
milarin 2023-03-19 13:03:20 +01:00
commit a98c7c875f
10 changed files with 393 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
**/*_test.go
**/*.txt

8
consts.go Normal file
View File

@ -0,0 +1,8 @@
package lexer
var (
Keywords = []string{"pipeline"}
Separators = []rune{'\n', ' ', '<', '>'}
StringSeparators = []rune{'\'', '"', '`'}
Operators = []rune{'='}
)

25
esc_seq.go Normal file
View File

@ -0,0 +1,25 @@
package lexer
import "strings"
var EscSeqPrintReplacer = strings.NewReplacer(
`\`, `\\`,
"\n", `\n`,
"\t", `\t`,
"\f", `\f`,
"\r", `\r`,
"\v", `\v`,
"\b", `\b`,
"\a", `\a`,
)
var EscSeqReplacer = strings.NewReplacer(
`\\`, `\`,
`\n`, "\n",
`\t`, "\t",
`\f`, "\f",
`\r`, "\r",
`\v`, "\v",
`\b`, "\b",
`\a`, "\a",
)

24
examples/example.slang Normal file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env slash
anime-watcher # comment
exit_on_empty # line 6: filter = ""
format -o '$HOME/Anime/{0}.*' # line 10: format string to file
filter -f # line 10: check if file exists
branch == "hi" # branch/split/if/filter multiple stdout fds
cat
:a="hello"> tee out1.txt
-
/home> cat
tee out2.txt
merge ordered # merge multiple stdin fds
echo
# list all go files in pwd
ls *.go # filepath.Glob
echo $HOME # replace env vars TOOD cd not possible in pipeline
~:A="hi" B="bye"> ls # start ls in $HOME folder
echo -e "123\n321\nasd"
branch '^\w+?$'
echo # output: asd

24
examples/example2.slang Normal file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env slash
pipeline trimSpaces { # pipeline definition for future use
format -i '^\s*?(.*?)\s*?$' -o '{1}' # equivalent to functions in conventional programming languages
}
range 0-10 # range command ignores stdin and produces output according to arguments
branch async { # branch built-in command splits input stream based on case operators
# merge operator is optional and defines the merge strategy of output streams (can be one of sync,async | default: sync)
case >= 5: # cases represent a pipeline (can also be filtered by regex?)
~:A="hi" B="bye"> trimSpaces # every command call is first checked for available pipeline (pwd and env variables can be provided if needed)
# pipeline cannot by called 'command' so a command with same name can still be called with `command trimSpaces`
default: # default pipeline is optional (default: empty pipeline)
}
# implementation of fizzbazz
range 1..100
if (v % 3 == 0 && v % 5 == 0) {echo "fizzbazz"}
if (v % 3 == 0) {echo "fizz"} else if (v % 5 == 0) {echo "bazz"}
if (v % 5 == 0) {
echo "bazz"
}

12
examples/fizzbazz.slang Normal file
View File

@ -0,0 +1,12 @@
#!/usr/bin/env slashlang
pipeline fizzbazz
if v % 3 == 0 && v % 5 == 0
print "fizzbazz"
if v % 3 == 0
print "fizz" # else if (v % 5 == 0) {print "bazz"}
if v % 5 == 0
<PWD="/home" A="asd"> print "bazz"
range 1..100
fizzbazz

14
go.mod Normal file
View File

@ -0,0 +1,14 @@
module git.milar.in/slash/lexer
go 1.20
require (
git.milar.in/milarin/bufr v0.0.12
git.milar.in/milarin/slices v0.0.7
)
require (
git.milar.in/milarin/adverr v1.1.0 // indirect
git.milar.in/milarin/ds v0.0.2 // indirect
git.milar.in/milarin/gmath v0.0.3 // indirect
)

10
go.sum Normal file
View File

@ -0,0 +1,10 @@
git.milar.in/milarin/adverr v1.1.0 h1:jD9WnOvs40lfMhvqQ7cllOaRJNBMWr1f07/s9jAadp0=
git.milar.in/milarin/adverr v1.1.0/go.mod h1:joU9sBb7ySyNv4SpTXB0Z4o1mjXsArBw4N27wjgzj9E=
git.milar.in/milarin/bufr v0.0.12 h1:BZwLFOdi5hohQuugQceFHwvmz7ZGYwyhdrBcKfZPjGs=
git.milar.in/milarin/bufr v0.0.12/go.mod h1:yIRL89LWUgRlmfuVAwq12YfFs+Hq2Ji4SKEUyqXVTLo=
git.milar.in/milarin/ds v0.0.2 h1:vCA3mDxZUNfvHpzrdz7SeBUKiPn74NTopo915IUG7I0=
git.milar.in/milarin/ds v0.0.2/go.mod h1:HJK7QERcRvV9j7xzEocrKUtW+1q4JB1Ly4Bj54chfwI=
git.milar.in/milarin/gmath v0.0.3 h1:ii6rKNItS55O/wtIFhD1cTN2BMwDZjTBmiOocKURvxM=
git.milar.in/milarin/gmath v0.0.3/go.mod h1:HDLftG5RLpiNGKiIWh+O2G1PYkNzyLDADO8Cd/1abiE=
git.milar.in/milarin/slices v0.0.7 h1:s+e8W+pATa2NrAtniruUoNfjpmlTVQgyKu4ttfkE1cU=
git.milar.in/milarin/slices v0.0.7/go.mod h1:qMhdtMnfWswc1rHpwgNw33lB84aNEkdBn5BDiYA+G3k=

223
lexer.go Normal file
View File

@ -0,0 +1,223 @@
package lexer
import (
"fmt"
"io"
"git.milar.in/milarin/bufr"
"git.milar.in/milarin/slices"
)
type Lexer struct {
src *bufr.Reader
Indent string
}
func New(r io.Reader) *Lexer {
return &Lexer{
src: bufr.New(r),
}
}
func (t *Lexer) Pos() bufr.Position {
index, line, column := t.src.Pos()
return bufr.Position{Index: index, Line: line, Column: column}
}
func (t *Lexer) Next() (*Token, error) {
rn, err := t.src.Rune()
if err != nil {
return nil, err
}
rn2, err := t.src.Rune()
if err != nil {
return nil, err
}
if err := t.src.UnreadRunes(2); err != nil {
return nil, err
}
if rn == '\t' || (rn == ' ' && rn2 == ' ') {
return t.parseIndent()
} else if rn == '#' {
return t.parseComment()
} else if slices.Contains(Separators, rn) {
return t.parseSeparator()
} else if slices.Contains(StringSeparators, rn) {
return t.parseStringLiteral()
} else if slices.Contains(Operators, rn) {
return t.parseOperator()
}
str, err := t.src.PeekStringUntil(bufr.OneOf(" \n"))
if err != nil {
return nil, err
}
if slices.Contains(Keywords, str) {
return t.parseKeyword()
}
return t.parseWord()
}
func (t *Lexer) parseComment() (*Token, error) {
start := t.Pos()
comment, err := t.src.StringUntil(bufr.IsNewLine)
if err != nil {
return nil, err
}
if err := t.src.UnreadRune(); err != nil {
return nil, err
}
return &Token{
Type: TokenTypeComment,
Value: comment,
Start: start,
End: t.Pos(),
}, nil
}
func (t *Lexer) parseKeyword() (*Token, error) {
start := t.Pos()
keyword, err := t.src.StringUntil(bufr.IsWhitespace)
if err != nil {
return nil, err
}
if err := t.src.UnreadRune(); err != nil {
return nil, err
}
return &Token{
Type: TokenTypeKeyword,
Value: keyword,
Start: start,
End: t.Pos(),
}, nil
}
func (t *Lexer) parseWord() (*Token, error) {
start := t.Pos()
word, err := t.src.StringUntil(bufr.IsWhitespace, bufr.Is('='))
if err != nil {
return nil, err
}
if err := t.src.UnreadRune(); err != nil {
return nil, err
}
return &Token{
Type: TokenTypeWord,
Value: word,
Start: start,
End: t.Pos(),
}, nil
}
func (t *Lexer) parseSeparator() (*Token, error) {
start := t.Pos()
rn, err := t.src.Rune()
if err != nil {
return nil, err
}
return &Token{
Type: TokenTypeSeparator,
Value: string(rn),
Start: start,
End: t.Pos(),
}, nil
}
func (t *Lexer) parseOperator() (*Token, error) {
start := t.Pos()
rn, err := t.src.Rune()
if err != nil {
return nil, err
}
return &Token{
Type: TokenTypeOperator,
Value: string(rn),
Start: start,
End: t.Pos(),
}, nil
}
func (t *Lexer) parseStringLiteral() (*Token, error) {
start := t.Pos()
startRn, err := t.src.Rune()
if err != nil {
return nil, err
}
literal, err := t.src.StringUntil(bufr.Is(startRn))
if err != nil {
return nil, err
}
literal = EscSeqReplacer.Replace(literal)
return &Token{
Type: TokenTypeWord,
Value: literal,
Start: start,
End: t.Pos(),
}, nil
}
func (t *Lexer) parseIndent() (*Token, error) {
start := t.Pos()
// no indentation set yet
if t.Indent == "" {
str, err := t.src.StringWhile(bufr.OneOf("\t "))
if err != nil {
return nil, err
}
if err := t.src.UnreadRune(); err != nil {
return nil, err
}
//fmt.Printf("indentation set to '%s'\n", EscSeqReplacer.Replace(str))
t.Indent = str
return &Token{
Type: TokenTypeIndent,
Value: str,
Start: start,
End: t.Pos(),
}, nil
}
for _, rn := range t.Indent {
ok, err := t.src.ExpectRune(bufr.Is(rn))
if err != nil {
return nil, err
}
if !ok {
return nil, fmt.Errorf("mixed indentation styles at (%d:%d)", start.Line, start.Column)
}
}
return &Token{
Type: TokenTypeIndent,
Value: t.Indent,
Start: start,
End: t.Pos(),
}, nil
}

51
token.go Normal file
View File

@ -0,0 +1,51 @@
package lexer
import (
"fmt"
"git.milar.in/milarin/bufr"
)
type Token struct {
Type TokenType
Value string
Start bufr.Position
End bufr.Position
}
func (t Token) String() string {
return fmt.Sprintf(
"type: %s | value: '%s' | start: (%d:%d) | end: (%d:%d)",
t.Type, EscSeqPrintReplacer.Replace(t.Value), t.Start.Line, t.Start.Column, t.End.Line, t.End.Column,
)
}
type TokenType uint8
const (
TokenTypeComment TokenType = iota
TokenTypeKeyword
TokenTypeIndent
TokenTypeSeparator
TokenTypeWord
TokenTypeOperator
)
func (tt TokenType) String() string {
switch tt {
case TokenTypeComment:
return "comment"
case TokenTypeKeyword:
return "keyword"
case TokenTypeIndent:
return "indent"
case TokenTypeSeparator:
return "separator"
case TokenTypeWord:
return "word"
case TokenTypeOperator:
return "operator"
default:
panic(fmt.Sprintf("invalid token type: %d", tt))
}
}