package tokenizer

import (
	"strings"
)

type Token struct {
	Token string
}

var delimiter = []byte{
	'=',
	';',
	'{',
	'}',
	'(',
	')',
	'[',
	']',
	'<',
	'>',
	'!',
	',',
	':',
	'&',
	'|',
	'+',
	'-',
	'*',
	'/'} // TODO: modulo?

var keywords = []string{
	"var",
	"if",
	"while",
	"switch",
	"for",
	"foreach",
	"func",
	"true",
	"false",
	"case",
	"default",
	"return",
	"try",
	"catch",
	"exitwith",
	"waituntil",
	"code"}

var whitespace = []byte{' ', '\n', '\t', '\r'}
var identifier = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"

// Tokenizes the given byte array into syntax tokens,
// which can be parsed later.
func Tokenize(code []byte) []Token {
	code = removeComments(code)
	tokens := make([]Token, 0)
	token, mask, isstring := "", false, false

	for i := range code {
		c := code[i]

		// string masks (backslash)
		if c == '\\' && !mask {
			token += "\\"
			mask = true
			continue
		}

		// string
		if c == '"' && !mask {
			token += "\""
			isstring = !isstring
			continue
		}

		if isstring {
			token += string(c)
		} else {
			// delimeter, keyword or variable/expression
			if byteArrayContains(delimiter, c) {
				if token != "" {
					tokens = append(tokens, Token{token})
				}

				tokens = append(tokens, Token{string(c)})
				token = ""
			} else if stringArrayContains(strings.ToLower(token)) && !isIdentifierCharacter(c) {
				tokens = append(tokens, Token{token})
				token = ""
			} else if !byteArrayContains(whitespace, c) {
				token += string(c)
			}
		}

		mask = false
	}

	return tokens
}

// Removes all comments from input byte array.
// Comments are single line comments, starting with // (two slashes),
// multi line comments with /* ... */ (slash star, star slash).
func removeComments(code []byte) []byte {
	newcode := make([]byte, len(code))
	j, mask, isstring := 0, false, false

	for i := 0; i < len(code); i++ {
		c := code[i]

		// do not remove comments from strings
		if c == '\\' && !mask {
			mask = true
		}

		if c == '"' && !mask {
			isstring = !isstring
		}

		// single/multi line comment
		if !isstring {
			if c == '/' && nextChar(code, i) == '/' {
				i = skipSingleLineComment(code, i+1)
				continue
			} else if c == '/' && nextChar(code, i) == '*' {
				i = skipMultiLineComment(code, i+1)
				continue
			}
		}

		newcode[j] = c
		j++
		mask = false
	}

	return newcode[:j]
}

// Returns the next character in code starting at i.
// If no character is left, '0' will be returned.
func nextChar(code []byte, i int) byte {
	i++

	if i < len(code) {
		return code[i]
	}

	return '0'
}

// Used to skip a line if a single line comment was found.
func skipSingleLineComment(code []byte, i int) int {
	for i < len(code) && code[i] != '\n' {
		i++
	}

	return i
}

// Used to skip a block of characters if a multi line comment was found
func skipMultiLineComment(code []byte, i int) int {
	for i < len(code) && !(code[i] == '*' && nextChar(code, i) == '/') {
		i++
	}

	return i + 1
}

// Checks if a byte array (string) contains a delimeter.
func byteArrayContains(haystack []byte, needle byte) bool {
	for i := range haystack {
		if haystack[i] == needle {
			return true
		}
	}

	return false
}

// Checks if a byte array (string) contains a string delimeter.
func stringArrayContains(needle string) bool {
	for i := range keywords {
		if keywords[i] == needle {
			return true
		}
	}

	return false
}

// Checks if a character is allowed for identifiers.
func isIdentifierCharacter(c byte) bool {
	for i := range identifier {
		if identifier[i] == c {
			return true
		}
	}

	return false
}