Improve token kinds and types for parser and lexer
This commit is contained in:
parent
2b00c4f884
commit
ab32e69913
113
Script/main.luau
113
Script/main.luau
|
|
@ -4,29 +4,26 @@ local process = require "@lune/process"
|
||||||
local exit = process.exit
|
local exit = process.exit
|
||||||
local colour = require "colour"
|
local colour = require "colour"
|
||||||
|
|
||||||
local INDENT = "INDENT"
|
type TokenKind =
|
||||||
local SPACE = "SPACE"
|
"INDENT"
|
||||||
local NEWLINE = "NEWLINE"
|
| "SPACE"
|
||||||
-- Literals
|
| "NEWLINE"
|
||||||
local IDENTIFIER = "IDENTIFIER"
|
| "IDENTIFIER"
|
||||||
local NUMBER = "NUMBER"
|
| "NUMBER"
|
||||||
local COMMENT = "COMMENT"
|
| "COMMENT"
|
||||||
local STRING = "STRING"
|
| "STRING"
|
||||||
local KEYWORD = "KEYWORD"
|
| "KEYWORD"
|
||||||
-- Operators
|
| "TEXTOPERATOR"
|
||||||
local TEXTOPERATOR = "TEXTOPERATOR"
|
| "EQUALS"
|
||||||
local EQUALS = "EQUALS"
|
| "PLUS"
|
||||||
local PLUS = "PLUS"
|
| "PLUSPLUS"
|
||||||
local PLUSPLUS = "PLUSPLUS"
|
| "PLUSEQUALS"
|
||||||
local PLUSEQUALS = "PLUSEQUALS"
|
| "MINUS"
|
||||||
local MINUS = "MINUS"
|
| "MINUSMINUS"
|
||||||
local MINUSMINUS = "MINUSMINUS"
|
| "MINUSEQUALS"
|
||||||
local MINUSEQUALS = "MINUSEQUALS"
|
| "TIMES"
|
||||||
local TIMES = "TIMES"
|
| "DIVIDE"
|
||||||
local DIVIDE = "DIVIDE"
|
| "MODULO"
|
||||||
local MODULO = "MODULO"
|
|
||||||
-- OPEN_BRACE = "OPEN_BRACE"
|
|
||||||
-- CLOSE_BRACE = "CLOSE_BRACE"
|
|
||||||
|
|
||||||
local keywords = {
|
local keywords = {
|
||||||
["if"] = true,
|
["if"] = true,
|
||||||
|
|
@ -66,7 +63,7 @@ local postfixOperators = {
|
||||||
}
|
}
|
||||||
|
|
||||||
type Token = {
|
type Token = {
|
||||||
kind: string,
|
kind: TokenKind,
|
||||||
value: string,
|
value: string,
|
||||||
line: number,
|
line: number,
|
||||||
column: number,
|
column: number,
|
||||||
|
|
@ -222,12 +219,12 @@ local function parse(tokens: { Token }): { Expr }
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
while i < len do
|
while i < len do
|
||||||
if tokens[i].kind == NEWLINE then
|
if tokens[i].kind == "NEWLINE" then
|
||||||
blockIndent = 0
|
blockIndent = 0
|
||||||
-- chock next few tokens to see if they're indented
|
-- chock next few tokens to see if they're indented
|
||||||
|
|
||||||
local j = i + 1
|
local j = i + 1
|
||||||
while j < len and tokens[j].kind == INDENT do
|
while j < len and tokens[j].kind == "INDENT" do
|
||||||
blockIndent += 1
|
blockIndent += 1
|
||||||
j += 1
|
j += 1
|
||||||
end
|
end
|
||||||
|
|
@ -249,7 +246,7 @@ local function parse(tokens: { Token }): { Expr }
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
-- get all tokens until the end of the line
|
-- get all tokens until the end of the line
|
||||||
while i < len and tokens[i + 1].kind ~= NEWLINE do
|
while i < len and tokens[i + 1].kind ~= "NEWLINE" do
|
||||||
i += 1
|
i += 1
|
||||||
table.insert(condTokens, tokens[i])
|
table.insert(condTokens, tokens[i])
|
||||||
end
|
end
|
||||||
|
|
@ -261,7 +258,7 @@ local function parse(tokens: { Token }): { Expr }
|
||||||
end
|
end
|
||||||
|
|
||||||
local function nextNonSpace(): Token
|
local function nextNonSpace(): Token
|
||||||
while i < len and tokens[i].kind == SPACE do
|
while i < len and tokens[i].kind == "SPACE" do
|
||||||
i += 1
|
i += 1
|
||||||
end
|
end
|
||||||
return tokens[i]
|
return tokens[i]
|
||||||
|
|
@ -278,11 +275,11 @@ local function parse(tokens: { Token }): { Expr }
|
||||||
return cond[1]
|
return cond[1]
|
||||||
end
|
end
|
||||||
|
|
||||||
if token.kind == INDENT then
|
if token.kind == "INDENT" then
|
||||||
currentIndent += 1
|
currentIndent += 1
|
||||||
elseif token.kind == NEWLINE then
|
elseif token.kind == "NEWLINE" then
|
||||||
currentIndent = 0
|
currentIndent = 0
|
||||||
elseif token.kind == KEYWORD then
|
elseif token.kind == "KEYWORD" then
|
||||||
if token.value == "if" then
|
if token.value == "if" then
|
||||||
addExpr(
|
addExpr(
|
||||||
IfExpr(
|
IfExpr(
|
||||||
|
|
@ -308,7 +305,7 @@ local function parse(tokens: { Token }): { Expr }
|
||||||
print(token)
|
print(token)
|
||||||
error(colour.red "unknown token value " .. token.value)
|
error(colour.red "unknown token value " .. token.value)
|
||||||
end
|
end
|
||||||
elseif token.kind == IDENTIFIER then
|
elseif token.kind == "IDENTIFIER" then
|
||||||
-- identifier is at the start of an expression, it could be:
|
-- identifier is at the start of an expression, it could be:
|
||||||
-- 1: a binop (next token is a text operator or operator
|
-- 1: a binop (next token is a text operator or operator
|
||||||
-- 3: a postfix op (next token is ++ or --)
|
-- 3: a postfix op (next token is ++ or --)
|
||||||
|
|
@ -330,9 +327,13 @@ local function parse(tokens: { Token }): { Expr }
|
||||||
))
|
))
|
||||||
elseif postfixOperators[nextToken.value] then
|
elseif postfixOperators[nextToken.value] then
|
||||||
-- postfix
|
-- postfix
|
||||||
|
error "unimplemented"
|
||||||
else
|
else
|
||||||
-- function call
|
-- function call
|
||||||
|
error "unimplemented"
|
||||||
end
|
end
|
||||||
|
elseif token.kind == "SPACE" or token.kind == "COMMENT" then
|
||||||
|
-- wtf
|
||||||
else
|
else
|
||||||
print(token)
|
print(token)
|
||||||
error(colour.red "unknown token kind " .. token.kind)
|
error(colour.red "unknown token kind " .. token.kind)
|
||||||
|
|
@ -351,7 +352,7 @@ local function lex(source: { string }): { Token }
|
||||||
local line, column = 1, 0
|
local line, column = 1, 0
|
||||||
|
|
||||||
local function addToken(
|
local function addToken(
|
||||||
kind: string,
|
kind: TokenKind,
|
||||||
value: string,
|
value: string,
|
||||||
newLine: number?,
|
newLine: number?,
|
||||||
newColumn: number?
|
newColumn: number?
|
||||||
|
|
@ -373,20 +374,20 @@ local function lex(source: { string }): { Token }
|
||||||
column += 1
|
column += 1
|
||||||
|
|
||||||
if char == "=" then
|
if char == "=" then
|
||||||
addToken(EQUALS, "=")
|
addToken("EQUALS", "=")
|
||||||
elseif char == "\n" then -- newline dont work for some reason
|
elseif char == "\n" then -- newline dont work for some reason
|
||||||
addToken(NEWLINE, "\n")
|
addToken("NEWLINE", "\n")
|
||||||
line += 1
|
line += 1
|
||||||
column = 0
|
column = 0
|
||||||
elseif char == " " then
|
elseif char == " " then
|
||||||
addToken(SPACE, " ")
|
addToken("SPACE", " ")
|
||||||
elseif char == "\t" then
|
elseif char == "\t" then
|
||||||
-- only if last line is a newline or an indent
|
-- only if last line is a newline or an indent
|
||||||
if last(1).kind == NEWLINE or last(1).kind == INDENT then
|
if last(1).kind == "NEWLINE" or last(1).kind == "INDENT" then
|
||||||
addToken(INDENT, "\t")
|
addToken("INDENT", "\t")
|
||||||
column += 3
|
column += 3
|
||||||
else
|
else
|
||||||
addToken(SPACE, "\t")
|
addToken("SPACE", "\t")
|
||||||
end
|
end
|
||||||
elseif char == ";" then
|
elseif char == ";" then
|
||||||
-- parse till end of line
|
-- parse till end of line
|
||||||
|
|
@ -400,7 +401,7 @@ local function lex(source: { string }): { Token }
|
||||||
end
|
end
|
||||||
column -= 1
|
column -= 1
|
||||||
i -= 1
|
i -= 1
|
||||||
addToken(COMMENT, comment, line, startColumn)
|
addToken("COMMENT", comment, line, startColumn)
|
||||||
elseif char == '"' then
|
elseif char == '"' then
|
||||||
local startLine, startColumn = line, column
|
local startLine, startColumn = line, column
|
||||||
|
|
||||||
|
|
@ -419,39 +420,39 @@ local function lex(source: { string }): { Token }
|
||||||
exit(1)
|
exit(1)
|
||||||
end
|
end
|
||||||
|
|
||||||
addToken(STRING, stringLiteral, startLine, startColumn)
|
addToken("STRING", stringLiteral, startLine, startColumn)
|
||||||
elseif char == "+" then
|
elseif char == "+" then
|
||||||
-- check if it's a ++ or a += or just a +
|
-- check if it's a ++ or a += or just a +
|
||||||
if i + 1 < len and source[i + 1] == "+" then
|
if i + 1 < len and source[i + 1] == "+" then
|
||||||
addToken(PLUSPLUS, "++")
|
addToken("PLUSPLUS", "++")
|
||||||
i += 1
|
i += 1
|
||||||
column += 1
|
column += 1
|
||||||
elseif i + 1 < len and source[i + 1] == "=" then
|
elseif i + 1 < len and source[i + 1] == "=" then
|
||||||
addToken(PLUSEQUALS, "+=")
|
addToken("PLUSEQUALS", "+=")
|
||||||
i += 1
|
i += 1
|
||||||
column += 1
|
column += 1
|
||||||
else
|
else
|
||||||
addToken(PLUS, "+")
|
addToken("PLUS", "+")
|
||||||
end
|
end
|
||||||
elseif char == "-" then
|
elseif char == "-" then
|
||||||
-- check if it's a -- or a -= or just a -
|
-- check if it's a -- or a -= or just a -
|
||||||
if i + 1 < len and source[i + 1] == "-" then
|
if i + 1 < len and source[i + 1] == "-" then
|
||||||
addToken(MINUSMINUS, "--")
|
addToken("MINUSMINUS", "--")
|
||||||
i += 1
|
i += 1
|
||||||
column += 1
|
column += 1
|
||||||
elseif i + 1 < len and source[i + 1] == "=" then
|
elseif i + 1 < len and source[i + 1] == "=" then
|
||||||
addToken(MINUSEQUALS, "-=")
|
addToken("MINUSEQUALS", "-=")
|
||||||
i += 1
|
i += 1
|
||||||
column += 1
|
column += 1
|
||||||
else
|
else
|
||||||
addToken(MINUS, "-")
|
addToken("MINUS", "-")
|
||||||
end
|
end
|
||||||
elseif char == "*" then
|
elseif char == "*" then
|
||||||
addToken(TIMES, "*")
|
addToken("TIMES", "*")
|
||||||
elseif char == "/" then
|
elseif char == "/" then
|
||||||
addToken(DIVIDE, "/")
|
addToken("DIVIDE", "/")
|
||||||
elseif char == "%" then
|
elseif char == "%" then
|
||||||
addToken(MODULO, "%")
|
addToken("MODULO", "%")
|
||||||
else
|
else
|
||||||
if char >= "0" and char <= "9" then
|
if char >= "0" and char <= "9" then
|
||||||
local startLine, startColumn = line, column
|
local startLine, startColumn = line, column
|
||||||
|
|
@ -466,7 +467,7 @@ local function lex(source: { string }): { Token }
|
||||||
end
|
end
|
||||||
column -= 1
|
column -= 1
|
||||||
i -= 1
|
i -= 1
|
||||||
addToken(NUMBER, number, startLine, startColumn)
|
addToken("NUMBER", number, startLine, startColumn)
|
||||||
elseif
|
elseif
|
||||||
char >= "a" and char <= "z" or char >= "A" and char <= "Z"
|
char >= "a" and char <= "z" or char >= "A" and char <= "Z"
|
||||||
then
|
then
|
||||||
|
|
@ -500,7 +501,7 @@ local function lex(source: { string }): { Token }
|
||||||
-- check if it's a text operator
|
-- check if it's a text operator
|
||||||
if textOperators[identifierOrKeyword] then
|
if textOperators[identifierOrKeyword] then
|
||||||
addToken(
|
addToken(
|
||||||
TEXTOPERATOR,
|
"TEXTOPERATOR",
|
||||||
identifierOrKeyword,
|
identifierOrKeyword,
|
||||||
startLine,
|
startLine,
|
||||||
startColumn
|
startColumn
|
||||||
|
|
@ -511,7 +512,7 @@ local function lex(source: { string }): { Token }
|
||||||
-- check if it's a keyword
|
-- check if it's a keyword
|
||||||
if keywords[identifierOrKeyword] then
|
if keywords[identifierOrKeyword] then
|
||||||
addToken(
|
addToken(
|
||||||
KEYWORD,
|
"KEYWORD",
|
||||||
identifierOrKeyword,
|
identifierOrKeyword,
|
||||||
startLine,
|
startLine,
|
||||||
startColumn
|
startColumn
|
||||||
|
|
@ -520,7 +521,7 @@ local function lex(source: { string }): { Token }
|
||||||
end
|
end
|
||||||
|
|
||||||
addToken(
|
addToken(
|
||||||
IDENTIFIER,
|
"IDENTIFIER",
|
||||||
identifierOrKeyword,
|
identifierOrKeyword,
|
||||||
startLine,
|
startLine,
|
||||||
startColumn
|
startColumn
|
||||||
|
|
@ -575,10 +576,10 @@ local function main()
|
||||||
local tokens = lex(split)
|
local tokens = lex(split)
|
||||||
|
|
||||||
for _, token in tokens do
|
for _, token in tokens do
|
||||||
if token.kind == SPACE then
|
if token.kind == "SPACE" then
|
||||||
continue
|
continue
|
||||||
end
|
end
|
||||||
if token.kind == NEWLINE then
|
if token.kind == "NEWLINE" then
|
||||||
print "────────────────┼───────────────┼─────────────────────────────"
|
print "────────────────┼───────────────┼─────────────────────────────"
|
||||||
continue
|
continue
|
||||||
end
|
end
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue