mirror of
https://github.com/Kugelschieber/asl.git
synced 2026-01-18 12:00:25 +00:00
203 lines
3.5 KiB
Go
203 lines
3.5 KiB
Go
package tokenizer
|
|
|
|
import (
|
|
"strings"
|
|
)
|
|
|
|
type Token struct {
|
|
Token string
|
|
}
|
|
|
|
var delimiter = []byte{
|
|
'=',
|
|
';',
|
|
'{',
|
|
'}',
|
|
'(',
|
|
')',
|
|
'[',
|
|
']',
|
|
'<',
|
|
'>',
|
|
'!',
|
|
',',
|
|
':',
|
|
'&',
|
|
'|',
|
|
'+',
|
|
'-',
|
|
'*',
|
|
'/'} // TODO: modulo?
|
|
|
|
var keywords = []string{
|
|
"var",
|
|
"if",
|
|
"while",
|
|
"switch",
|
|
"for",
|
|
"foreach",
|
|
"func",
|
|
"true",
|
|
"false",
|
|
"case",
|
|
"default",
|
|
"return",
|
|
"try",
|
|
"catch",
|
|
"exitwith",
|
|
"waituntil",
|
|
"code"}
|
|
|
|
var whitespace = []byte{' ', '\n', '\t', '\r'}
|
|
var identifier = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"
|
|
|
|
// Tokenizes the given byte array into syntax tokens,
|
|
// which can be parsed later.
|
|
func Tokenize(code []byte) []Token {
|
|
code = removeComments(code)
|
|
tokens := make([]Token, 0)
|
|
token, mask, isstring := "", false, false
|
|
|
|
for i := range code {
|
|
c := code[i]
|
|
|
|
// string masks (backslash)
|
|
if c == '\\' && !mask {
|
|
token += "\\"
|
|
mask = true
|
|
continue
|
|
}
|
|
|
|
// string
|
|
if c == '"' && !mask {
|
|
token += "\""
|
|
isstring = !isstring
|
|
continue
|
|
}
|
|
|
|
if isstring {
|
|
token += string(c)
|
|
} else {
|
|
// delimeter, keyword or variable/expression
|
|
if byteArrayContains(delimiter, c) {
|
|
if token != "" {
|
|
tokens = append(tokens, Token{token})
|
|
}
|
|
|
|
tokens = append(tokens, Token{string(c)})
|
|
token = ""
|
|
} else if stringArrayContains(strings.ToLower(token)) && !isIdentifierCharacter(c) {
|
|
tokens = append(tokens, Token{token})
|
|
token = ""
|
|
} else if !byteArrayContains(whitespace, c) {
|
|
token += string(c)
|
|
}
|
|
}
|
|
|
|
mask = false
|
|
}
|
|
|
|
return tokens
|
|
}
|
|
|
|
// Removes all comments from input byte array.
|
|
// Comments are single line comments, starting with // (two slashes),
|
|
// multi line comments with /* ... */ (slash star, star slash).
|
|
func removeComments(code []byte) []byte {
|
|
newcode := make([]byte, len(code))
|
|
j, mask, isstring := 0, false, false
|
|
|
|
for i := 0; i < len(code); i++ {
|
|
c := code[i]
|
|
|
|
// do not remove comments from strings
|
|
if c == '\\' && !mask {
|
|
mask = true
|
|
}
|
|
|
|
if c == '"' && !mask {
|
|
isstring = !isstring
|
|
}
|
|
|
|
// single/multi line comment
|
|
if !isstring {
|
|
if c == '/' && nextChar(code, i) == '/' {
|
|
i = skipSingleLineComment(code, i+1)
|
|
continue
|
|
} else if c == '/' && nextChar(code, i) == '*' {
|
|
i = skipMultiLineComment(code, i+1)
|
|
continue
|
|
}
|
|
}
|
|
|
|
newcode[j] = c
|
|
j++
|
|
mask = false
|
|
}
|
|
|
|
return newcode[:j]
|
|
}
|
|
|
|
// Returns the next character in code starting at i.
|
|
// If no character is left, '0' will be returned.
|
|
func nextChar(code []byte, i int) byte {
|
|
i++
|
|
|
|
if i < len(code) {
|
|
return code[i]
|
|
}
|
|
|
|
return '0'
|
|
}
|
|
|
|
// Used to skip a line if a single line comment was found.
|
|
func skipSingleLineComment(code []byte, i int) int {
|
|
for i < len(code) && code[i] != '\n' {
|
|
i++
|
|
}
|
|
|
|
return i
|
|
}
|
|
|
|
// Used to skip a block of characters if a multi line comment was found
|
|
func skipMultiLineComment(code []byte, i int) int {
|
|
for i < len(code) && !(code[i] == '*' && nextChar(code, i) == '/') {
|
|
i++
|
|
}
|
|
|
|
return i + 1
|
|
}
|
|
|
|
// Checks if a byte array (string) contains a delimeter.
|
|
func byteArrayContains(haystack []byte, needle byte) bool {
|
|
for i := range haystack {
|
|
if haystack[i] == needle {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// Checks if a byte array (string) contains a string delimeter.
|
|
func stringArrayContains(needle string) bool {
|
|
for i := range keywords {
|
|
if keywords[i] == needle {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// Checks if a character is allowed for identifiers.
|
|
func isIdentifierCharacter(c byte) bool {
|
|
for i := range identifier {
|
|
if identifier[i] == c {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|