From 276dd2c1e9812e2ce15ec433f480df8f43945167 Mon Sep 17 00:00:00 2001 From: Marvin Blum Date: Sun, 25 Oct 2015 13:03:51 +0100 Subject: [PATCH] Fixed tokenizer problem. --- ToDo.md | 4 ++-- in/complex.asl | 3 +-- src/asl/parser.go | 4 ++-- src/asl/parser_test.go | 4 ++-- src/asl/tokenizer.go | 18 +++++++++++++++--- src/asl/tokenizer_test.go | 14 +++++++++++--- test/tokenizer_each.asl | 3 --- test/tokenizer_foreach.asl | 3 +++ test/tokenizer_identifier.asl | 1 + 9 files changed, 37 insertions(+), 17 deletions(-) delete mode 100644 test/tokenizer_each.asl create mode 100644 test/tokenizer_foreach.asl create mode 100644 test/tokenizer_identifier.asl diff --git a/ToDo.md b/ToDo.md index 01f9c72..a8e5844 100644 --- a/ToDo.md +++ b/ToDo.md @@ -2,7 +2,7 @@ * ~~assign to returned values~~ * special cases (like if ... exitWith, waitUntil {...}) -* sqf: ... sqf whitespace +* ~~sqf: ... sqf whitespace~~ * ~~solution for build in commands which do not require left values~~ * ~~pretty/minified printing~~ * ~~usage~~ @@ -10,4 +10,4 @@ * concurrent compiling * ~~inline buildin function call -> foo(a)(bar(x)(y));~~ * ~~negative values e.g. -1, operator !~~ -* tokenizer splits commands like "format" -> for, mat +* ~~tokenizer splits commands like "format" -> for, mat~~ diff --git a/in/complex.asl b/in/complex.asl index 2e09f69..2fe4670 100644 --- a/in/complex.asl +++ b/in/complex.asl @@ -1,3 +1,2 @@ //diag_log format ["easyHC: found headless client with ID %1.", easyHCpresent]; -//diag_log () (format(xy)("asdf", "hjkl")); -var floating = 1.23; +diag_log () (format(xy)("asdf", "hjkl")); diff --git a/src/asl/parser.go b/src/asl/parser.go index 89af9ec..93b7966 100644 --- a/src/asl/parser.go +++ b/src/asl/parser.go @@ -29,7 +29,7 @@ func parseBlock() { parseSwitch() } else if accept("for") { parseFor() - } else if accept("each") { + } else if accept("foreach") { parseForeach() } else if accept("func") { parseFunction() @@ -182,7 +182,7 @@ func parseFor() { } func parseForeach() { - expect("each") + expect("foreach") expr := parseExpression(false) expect("{") appendOut("{", true) diff --git a/src/asl/parser_test.go b/src/asl/parser_test.go index a725bd5..7317232 100644 --- a/src/asl/parser_test.go +++ b/src/asl/parser_test.go @@ -41,8 +41,8 @@ func TestParserFor(t *testing.T) { equal(t, got, want) } -func TestParserEach(t *testing.T) { - got := getCompiled(t, "test/tokenizer_each.asl") +func TestParserForeach(t *testing.T) { + got := getCompiled(t, "test/tokenizer_foreach.asl") want := "{\n} forEach (allUnits);\n" equal(t, got, want) diff --git a/src/asl/tokenizer.go b/src/asl/tokenizer.go index 182a24b..8f31154 100644 --- a/src/asl/tokenizer.go +++ b/src/asl/tokenizer.go @@ -35,7 +35,7 @@ var keywords = []string{ "while", "switch", "for", - "each", + "foreach", "func", "true", "false", @@ -44,6 +44,7 @@ var keywords = []string{ "return"} var whitespace = []byte{' ', '\n', '\t'} +var identifier = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_" // Tokenizes the given byte array into syntax tokens, // which can be parsed later. @@ -51,7 +52,7 @@ func Tokenize(code []byte) []Token { code = removeComments(code) tokens := make([]Token, 0) token, mask, isstring := "", false, false - + for i := range code { c := code[i] @@ -80,7 +81,7 @@ func Tokenize(code []byte) []Token { tokens = append(tokens, Token{string(c)}) token = "" - } else if stringArrayContains(keywords, strings.ToLower(token)) { + } else if stringArrayContains(keywords, strings.ToLower(token)) && !isIdentifierCharacter(c) { tokens = append(tokens, Token{token}) token = "" } else if !byteArrayContains(whitespace, c) { @@ -183,3 +184,14 @@ func stringArrayContains(haystack []string, needle string) bool { return false } + +// Checks if a character is allowed for identifiers. +func isIdentifierCharacter(c byte) bool { + for i := range identifier { + if identifier[i] == c { + return true + } + } + + return false +} diff --git a/src/asl/tokenizer_test.go b/src/asl/tokenizer_test.go index 7eb6260..d2fbcfe 100644 --- a/src/asl/tokenizer_test.go +++ b/src/asl/tokenizer_test.go @@ -37,9 +37,9 @@ func TestTokenizerFor(t *testing.T) { compareTokens(t, &got, &want) } -func TestTokenizerEach(t *testing.T) { - got := getTokens(t, "test/tokenizer_each.asl") - want := []string{"each", "allUnits", "{", "}"} +func TestTokenizerForach(t *testing.T) { + got := getTokens(t, "test/tokenizer_foreach.asl") + want := []string{"foreach", "allUnits", "{", "}"} compareLength(t, &got, &want) compareTokens(t, &got, &want) @@ -69,6 +69,14 @@ func TestTokenizerExpression(t *testing.T) { compareTokens(t, &got, &want) } +func TestTokenizerIdentifier(t *testing.T) { + got := getTokens(t, "test/tokenizer_identifier.asl") + want := []string{"var", "format", "=", "\"should not be for mat!\"", ";"} + + compareLength(t, &got, &want) + compareTokens(t, &got, &want) +} + func compareLength(t *testing.T, got *[]Token, want *[]string) { if len(*got) != len(*want) { t.Error("Length of tokens got and expected tokens not equal, was:") diff --git a/test/tokenizer_each.asl b/test/tokenizer_each.asl deleted file mode 100644 index 344d883..0000000 --- a/test/tokenizer_each.asl +++ /dev/null @@ -1,3 +0,0 @@ -each allUnits { - // ... -} diff --git a/test/tokenizer_foreach.asl b/test/tokenizer_foreach.asl new file mode 100644 index 0000000..21f1506 --- /dev/null +++ b/test/tokenizer_foreach.asl @@ -0,0 +1,3 @@ +foreach allUnits { + // ... +} diff --git a/test/tokenizer_identifier.asl b/test/tokenizer_identifier.asl new file mode 100644 index 0000000..c59b3f7 --- /dev/null +++ b/test/tokenizer_identifier.asl @@ -0,0 +1 @@ +var format = "should not be for mat!";