Improve token kinds and types for parser and lexer

This commit is contained in:
Lewin Kelly 2024-02-17 23:56:39 +00:00
parent 2b00c4f884
commit ab32e69913
1 changed files with 57 additions and 56 deletions

View File

@ -4,29 +4,26 @@ local process = require "@lune/process"
local exit = process.exit local exit = process.exit
local colour = require "colour" local colour = require "colour"
local INDENT = "INDENT" type TokenKind =
local SPACE = "SPACE" "INDENT"
local NEWLINE = "NEWLINE" | "SPACE"
-- Literals | "NEWLINE"
local IDENTIFIER = "IDENTIFIER" | "IDENTIFIER"
local NUMBER = "NUMBER" | "NUMBER"
local COMMENT = "COMMENT" | "COMMENT"
local STRING = "STRING" | "STRING"
local KEYWORD = "KEYWORD" | "KEYWORD"
-- Operators | "TEXTOPERATOR"
local TEXTOPERATOR = "TEXTOPERATOR" | "EQUALS"
local EQUALS = "EQUALS" | "PLUS"
local PLUS = "PLUS" | "PLUSPLUS"
local PLUSPLUS = "PLUSPLUS" | "PLUSEQUALS"
local PLUSEQUALS = "PLUSEQUALS" | "MINUS"
local MINUS = "MINUS" | "MINUSMINUS"
local MINUSMINUS = "MINUSMINUS" | "MINUSEQUALS"
local MINUSEQUALS = "MINUSEQUALS" | "TIMES"
local TIMES = "TIMES" | "DIVIDE"
local DIVIDE = "DIVIDE" | "MODULO"
local MODULO = "MODULO"
-- OPEN_BRACE = "OPEN_BRACE"
-- CLOSE_BRACE = "CLOSE_BRACE"
local keywords = { local keywords = {
["if"] = true, ["if"] = true,
@ -66,7 +63,7 @@ local postfixOperators = {
} }
type Token = { type Token = {
kind: string, kind: TokenKind,
value: string, value: string,
line: number, line: number,
column: number, column: number,
@ -222,12 +219,12 @@ local function parse(tokens: { Token }): { Expr }
i += 1 i += 1
while i < len do while i < len do
if tokens[i].kind == NEWLINE then if tokens[i].kind == "NEWLINE" then
blockIndent = 0 blockIndent = 0
-- chock next few tokens to see if they're indented -- chock next few tokens to see if they're indented
local j = i + 1 local j = i + 1
while j < len and tokens[j].kind == INDENT do while j < len and tokens[j].kind == "INDENT" do
blockIndent += 1 blockIndent += 1
j += 1 j += 1
end end
@ -249,7 +246,7 @@ local function parse(tokens: { Token }): { Expr }
i += 1 i += 1
-- get all tokens until the end of the line -- get all tokens until the end of the line
while i < len and tokens[i + 1].kind ~= NEWLINE do while i < len and tokens[i + 1].kind ~= "NEWLINE" do
i += 1 i += 1
table.insert(condTokens, tokens[i]) table.insert(condTokens, tokens[i])
end end
@ -261,7 +258,7 @@ local function parse(tokens: { Token }): { Expr }
end end
local function nextNonSpace(): Token local function nextNonSpace(): Token
while i < len and tokens[i].kind == SPACE do while i < len and tokens[i].kind == "SPACE" do
i += 1 i += 1
end end
return tokens[i] return tokens[i]
@ -278,11 +275,11 @@ local function parse(tokens: { Token }): { Expr }
return cond[1] return cond[1]
end end
if token.kind == INDENT then if token.kind == "INDENT" then
currentIndent += 1 currentIndent += 1
elseif token.kind == NEWLINE then elseif token.kind == "NEWLINE" then
currentIndent = 0 currentIndent = 0
elseif token.kind == KEYWORD then elseif token.kind == "KEYWORD" then
if token.value == "if" then if token.value == "if" then
addExpr( addExpr(
IfExpr( IfExpr(
@ -308,7 +305,7 @@ local function parse(tokens: { Token }): { Expr }
print(token) print(token)
error(colour.red "unknown token value " .. token.value) error(colour.red "unknown token value " .. token.value)
end end
elseif token.kind == IDENTIFIER then elseif token.kind == "IDENTIFIER" then
-- identifier is at the start of an expression, it could be: -- identifier is at the start of an expression, it could be:
-- 1: a binop (next token is a text operator or operator -- 1: a binop (next token is a text operator or operator
-- 3: a postfix op (next token is ++ or --) -- 3: a postfix op (next token is ++ or --)
@ -330,9 +327,13 @@ local function parse(tokens: { Token }): { Expr }
)) ))
elseif postfixOperators[nextToken.value] then elseif postfixOperators[nextToken.value] then
-- postfix -- postfix
error "unimplemented"
else else
-- function call -- function call
error "unimplemented"
end end
elseif token.kind == "SPACE" or token.kind == "COMMENT" then
-- wtf
else else
print(token) print(token)
error(colour.red "unknown token kind " .. token.kind) error(colour.red "unknown token kind " .. token.kind)
@ -351,7 +352,7 @@ local function lex(source: { string }): { Token }
local line, column = 1, 0 local line, column = 1, 0
local function addToken( local function addToken(
kind: string, kind: TokenKind,
value: string, value: string,
newLine: number?, newLine: number?,
newColumn: number? newColumn: number?
@ -373,20 +374,20 @@ local function lex(source: { string }): { Token }
column += 1 column += 1
if char == "=" then if char == "=" then
addToken(EQUALS, "=") addToken("EQUALS", "=")
elseif char == "\n" then -- newline dont work for some reason elseif char == "\n" then -- newline dont work for some reason
addToken(NEWLINE, "\n") addToken("NEWLINE", "\n")
line += 1 line += 1
column = 0 column = 0
elseif char == " " then elseif char == " " then
addToken(SPACE, " ") addToken("SPACE", " ")
elseif char == "\t" then elseif char == "\t" then
-- only if last line is a newline or an indent -- only if last line is a newline or an indent
if last(1).kind == NEWLINE or last(1).kind == INDENT then if last(1).kind == "NEWLINE" or last(1).kind == "INDENT" then
addToken(INDENT, "\t") addToken("INDENT", "\t")
column += 3 column += 3
else else
addToken(SPACE, "\t") addToken("SPACE", "\t")
end end
elseif char == ";" then elseif char == ";" then
-- parse till end of line -- parse till end of line
@ -400,7 +401,7 @@ local function lex(source: { string }): { Token }
end end
column -= 1 column -= 1
i -= 1 i -= 1
addToken(COMMENT, comment, line, startColumn) addToken("COMMENT", comment, line, startColumn)
elseif char == '"' then elseif char == '"' then
local startLine, startColumn = line, column local startLine, startColumn = line, column
@ -419,39 +420,39 @@ local function lex(source: { string }): { Token }
exit(1) exit(1)
end end
addToken(STRING, stringLiteral, startLine, startColumn) addToken("STRING", stringLiteral, startLine, startColumn)
elseif char == "+" then elseif char == "+" then
-- check if it's a ++ or a += or just a + -- check if it's a ++ or a += or just a +
if i + 1 < len and source[i + 1] == "+" then if i + 1 < len and source[i + 1] == "+" then
addToken(PLUSPLUS, "++") addToken("PLUSPLUS", "++")
i += 1 i += 1
column += 1 column += 1
elseif i + 1 < len and source[i + 1] == "=" then elseif i + 1 < len and source[i + 1] == "=" then
addToken(PLUSEQUALS, "+=") addToken("PLUSEQUALS", "+=")
i += 1 i += 1
column += 1 column += 1
else else
addToken(PLUS, "+") addToken("PLUS", "+")
end end
elseif char == "-" then elseif char == "-" then
-- check if it's a -- or a -= or just a - -- check if it's a -- or a -= or just a -
if i + 1 < len and source[i + 1] == "-" then if i + 1 < len and source[i + 1] == "-" then
addToken(MINUSMINUS, "--") addToken("MINUSMINUS", "--")
i += 1 i += 1
column += 1 column += 1
elseif i + 1 < len and source[i + 1] == "=" then elseif i + 1 < len and source[i + 1] == "=" then
addToken(MINUSEQUALS, "-=") addToken("MINUSEQUALS", "-=")
i += 1 i += 1
column += 1 column += 1
else else
addToken(MINUS, "-") addToken("MINUS", "-")
end end
elseif char == "*" then elseif char == "*" then
addToken(TIMES, "*") addToken("TIMES", "*")
elseif char == "/" then elseif char == "/" then
addToken(DIVIDE, "/") addToken("DIVIDE", "/")
elseif char == "%" then elseif char == "%" then
addToken(MODULO, "%") addToken("MODULO", "%")
else else
if char >= "0" and char <= "9" then if char >= "0" and char <= "9" then
local startLine, startColumn = line, column local startLine, startColumn = line, column
@ -466,7 +467,7 @@ local function lex(source: { string }): { Token }
end end
column -= 1 column -= 1
i -= 1 i -= 1
addToken(NUMBER, number, startLine, startColumn) addToken("NUMBER", number, startLine, startColumn)
elseif elseif
char >= "a" and char <= "z" or char >= "A" and char <= "Z" char >= "a" and char <= "z" or char >= "A" and char <= "Z"
then then
@ -500,7 +501,7 @@ local function lex(source: { string }): { Token }
-- check if it's a text operator -- check if it's a text operator
if textOperators[identifierOrKeyword] then if textOperators[identifierOrKeyword] then
addToken( addToken(
TEXTOPERATOR, "TEXTOPERATOR",
identifierOrKeyword, identifierOrKeyword,
startLine, startLine,
startColumn startColumn
@ -511,7 +512,7 @@ local function lex(source: { string }): { Token }
-- check if it's a keyword -- check if it's a keyword
if keywords[identifierOrKeyword] then if keywords[identifierOrKeyword] then
addToken( addToken(
KEYWORD, "KEYWORD",
identifierOrKeyword, identifierOrKeyword,
startLine, startLine,
startColumn startColumn
@ -520,7 +521,7 @@ local function lex(source: { string }): { Token }
end end
addToken( addToken(
IDENTIFIER, "IDENTIFIER",
identifierOrKeyword, identifierOrKeyword,
startLine, startLine,
startColumn startColumn
@ -575,10 +576,10 @@ local function main()
local tokens = lex(split) local tokens = lex(split)
for _, token in tokens do for _, token in tokens do
if token.kind == SPACE then if token.kind == "SPACE" then
continue continue
end end
if token.kind == NEWLINE then if token.kind == "NEWLINE" then
print "────────────────┼───────────────┼─────────────────────────────" print "────────────────┼───────────────┼─────────────────────────────"
continue continue
end end