melt/Script/main.luau

721 lines
14 KiB
Plaintext

local fs = require "@lune/fs"
local process = require "@lune/process"
local exit = process.exit
local colour = require "colour"
type TokenKind =
"INDENT"
| "SPACE"
| "NEWLINE"
| "IDENTIFIER"
| "NUMBER"
| "COMMENT"
| "STRING"
| "KEYWORD"
| "TEXTOPERATOR"
| "EQUALS"
| "PLUS"
| "PLUSPLUS"
| "PLUSEQUALS"
| "MINUS"
| "MINUSMINUS"
| "MINUSEQUALS"
| "TIMES"
| "DIVIDE"
| "MODULO"
local keywords = {
["if"] = true,
["elseif"] = true,
["else"] = true,
["loop"] = true,
["for"] = true,
["break"] = true,
["continue"] = true,
}
local textOperators = {
["is"] = true,
["and"] = true,
["or"] = true,
["not"] = true,
}
local binaryOperators = {
["is"] = true,
["and"] = true,
["or"] = true,
["not"] = true,
["="] = true,
["+"] = true,
["+="] = true,
["-"] = true,
["-="] = true,
["*"] = true,
["/"] = true,
["%"] = true,
}
local postfixOperators = {
["++"] = true,
["--"] = true,
}
type Token = {
kind: TokenKind,
value: string,
line: number,
column: number,
}
type Expr = {
startToken: Token,
kind: string,
}
type BlockExpr = Expr & {
expressions: { Expr },
}
local function BlockExpr(startToken: Token, expressions: { Expr }): BlockExpr
return {
startToken = startToken,
kind = "block",
expressions = expressions,
}
end
type IfExpr = Expr & {
condition: Expr,
block: BlockExpr,
}
local function IfExpr(
startToken: Token,
condition: Expr,
block: BlockExpr
): IfExpr
return {
startToken = startToken,
kind = "if",
condition = condition,
block = block,
}
end
type ElseIfExpr = Expr & {
condition: Expr,
block: BlockExpr,
}
local function ElseIfExpr(
startToken: Token,
condition: Expr,
block: BlockExpr
): ElseIfExpr
return {
startToken = startToken,
kind = "elseif",
condition = condition,
block = block,
}
end
type ElseExpr = Expr & {
block: Expr,
}
local function ElseExpr(startToken: Token, block: BlockExpr): ElseExpr
return {
startToken = startToken,
kind = "else",
block = block,
}
end
type LoopExpr = Expr & {
block: Expr,
}
local function LoopExpr(startToken: Token, block: BlockExpr): LoopExpr
return {
startToken = startToken,
kind = "loop",
block = block,
}
end
type BinOpExpr = Expr & {
left: Expr,
right: Expr,
operator: Token,
}
local function BinOpExpr(
startToken: Token,
left: Expr,
right: Expr,
operator: Token
): BinOpExpr
return {
startToken = startToken,
kind = "binop",
left = left,
right = right,
operator = operator,
}
end
type PostfixOpExpr = Expr & {
expr: Expr,
operator: Token,
}
local function PostfixOpExpr(startToken: Token, expr: Expr, operator: Token)
return {
startToken = startToken,
kind = "postfix",
expr = expr,
operator = operator,
}
end
type FunctionCallExpr = Expr & {
name: Token,
arg: Expr,
}
local function FunctionCallExpr(
startToken: Token,
name: Token,
arg: Expr
): FunctionCallExpr
return {
startToken = startToken,
kind = "functioncall",
name = name,
arg = arg,
}
end
type IdentifierExpr = Expr
local function IdentifierExpr(startToken: Token): IdentifierExpr
if startToken.kind ~= "IDENTIFIER" then
error(`expected identifier, got {startToken.kind}`)
end
return {
startToken = startToken,
kind = "identifier",
}
end
type NumberExpr = Expr
local function NumberExpr(startToken: Token): NumberExpr
if startToken.kind ~= "NUMBER" then
error(`expected number, got {startToken.kind}`)
end
return {
startToken = startToken,
kind = "identifier",
}
end
type StringExpr = Expr
local function StringExpr(startToken: Token): StringExpr
if startToken.kind ~= "STRING" then
error(`expected number, got {startToken.kind}`)
end
return {
startToken = startToken,
kind = "identifier",
}
end
-- yea
local function parse(tokens: { Token }): { Expr }
local program: { Expr } = {}
local function addExpr(expr: Expr)
table.insert(program, expr)
end
local i = 0
local len = #tokens
while i < len do
i += 1
local token = tokens[i]
local currentIndent = 0
local function getBlock(): { Token }
-- get tokens until the end of the block (which is the same indent level as the if statement)
local blockTokens: { Token } = {}
local blockIndent = 0
-- skip newline at start
i += 1
while i < len do
if tokens[i].kind == "NEWLINE" then
blockIndent = 0
-- chock next few tokens to see if they're indented
local j = i + 1
while j < len and tokens[j].kind == "INDENT" do
blockIndent += 1
j += 1
end
if blockIndent <= currentIndent then
break
end
end
table.insert(blockTokens, tokens[i])
i += 1
end
return blockTokens
end
local function getCond(): { Token }
local condTokens: { Token } = {}
-- skip the keyword
i += 1
-- get all tokens until the end of the line
while i < len and tokens[i + 1].kind ~= "NEWLINE" do
i += 1
table.insert(condTokens, tokens[i])
end
-- skip the newline
i += 1
return condTokens
end
local function nextNonSpace(): (Token, number)
local j = i
while j < len and tokens[j].kind == "SPACE" do
j += 1
end
return tokens[j], j
end
local function parseCond(condTokens: { Token }): Expr
local cond = parse(condTokens)
if #cond > 1 then
error(colour.red "too many exprs in cond")
elseif #cond < 1 then
error(colour.red "not enough exprs in cond")
end
return cond[1]
end
if token.kind == "INDENT" then
currentIndent += 1
elseif token.kind == "NEWLINE" then
currentIndent = 0
elseif token.kind == "KEYWORD" then
if token.value == "if" then
addExpr(
IfExpr(
token,
parseCond(getCond()),
BlockExpr(token, parse(getBlock()))
)
)
elseif token.value == "elseif" then
addExpr(
ElseIfExpr(
token,
parseCond(getCond()),
BlockExpr(token, parse(getBlock()))
)
)
elseif token.value == "else" then
-- skip newline
i += 1
addExpr(ElseExpr(token, BlockExpr(token, parse(getBlock()))))
elseif token.value == "loop" then
-- skip newline
i += 1
addExpr(LoopExpr(token, BlockExpr(token, parse(getBlock()))))
else
print(token)
error(colour.red "unknown token value " .. token.value)
end
elseif token.kind == "IDENTIFIER" then
-- identifier is at the start of an expression, it could be:
-- 1: a binop (next token is a text operator or operator
-- 3: a postfix op (next token is ++ or --)
-- 4: a function call
-- 5: standalone
-- after one 2am philosophical compiler thinking session, I've concluded that yes, an assignment is indeed a binop
-- skip the identifier
i += 1
local nextToken, advance = nextNonSpace()
if not nextToken then
-- standalone
i = advance
addExpr(IdentifierExpr(token))
elseif binaryOperators[nextToken.value] then
-- binop
i = advance
addExpr(BinOpExpr(
token,
IdentifierExpr(token),
-- get condition tokens as rhs
parseCond(getCond()),
nextToken
))
elseif postfixOperators[nextToken.value] then
-- postfix
error "unimplemented"
else
i -= 1 -- getCond skips the identifier
addExpr(FunctionCallExpr(token, token, parseCond(getCond())))
end
elseif token.kind == "NUMBER" then
-- number is at the start of an expression, it could be:
-- 1: a binop (next token is a text operator or operator
-- 2: standalone
-- skip the number
i += 1
local nextToken, advance = nextNonSpace()
local function standalone()
i = advance
addExpr(NumberExpr(token))
end
if not nextToken then
standalone()
elseif binaryOperators[nextToken.value] then
-- binop
i = advance
addExpr(BinOpExpr(
token,
IdentifierExpr(token),
-- get condition tokens as rhs
parseCond(getCond()),
nextToken
))
else
standalone()
end
elseif token.kind == "STRING" then
-- string is at the start of an expression, it could be:
-- 1: a binop (next token is a text operator or operator
-- 2: standalone
-- skip the string
i += 1
local nextToken, advance = nextNonSpace()
local function standalone()
i = advance
addExpr(StringExpr(token))
end
if not nextToken then
standalone()
elseif binaryOperators[nextToken.value] then
-- binop
i = advance
addExpr(BinOpExpr(
token,
IdentifierExpr(token),
-- get condition tokens as rhs
parseCond(getCond()),
nextToken
))
else
standalone()
end
elseif token.kind == "SPACE" or token.kind == "COMMENT" then
-- wtf
else
print(token)
error(colour.red "unknown token kind " .. token.kind)
end
end
return program
end
local function lex(source: { string }): { Token }
local tokens: { Token } = {}
local function last(n: number): Token
return tokens[#tokens - (n - 1)]
end
local line, column = 1, 0
local function addToken(
kind: TokenKind,
value: string,
newLine: number?,
newColumn: number?
)
table.insert(tokens, {
kind = kind,
value = value,
line = newLine or line,
column = newColumn or column,
})
end
local len = #source + 1
local i = 0
while i < len - 1 do
i += 1
local char = source[i]
column += 1
if char == "=" then
addToken("EQUALS", "=")
elseif char == "\n" then -- newline dont work for some reason
addToken("NEWLINE", "\n")
line += 1
column = 0
elseif char == " " then
addToken("SPACE", " ")
elseif char == "\t" then
-- only if last line is a newline or an indent
if last(1).kind == "NEWLINE" or last(1).kind == "INDENT" then
addToken("INDENT", "\t")
column += 3
else
addToken("SPACE", "\t")
end
elseif char == ";" then
-- parse till end of line
local startColumn = column
i += 1 -- skip the semicolon
local comment = ""
while i < len and source[i] ~= "\n" do
comment ..= source[i]
column += 1
i += 1
end
column -= 1
i -= 1
addToken("COMMENT", comment, line, startColumn)
elseif char == '"' then
local startLine, startColumn = line, column
local stringLiteral = ""
column += 1
i += 1 -- skip the first quote
while i < len and source[i] ~= '"' do
stringLiteral ..= source[i]
column += 1
i += 1
end
if i == len then
print(colour.red "unclosed string literal", stringLiteral)
exit(1)
end
addToken("STRING", stringLiteral, startLine, startColumn)
elseif char == "+" then
-- check if it's a ++ or a += or just a +
if i + 1 < len and source[i + 1] == "+" then
addToken("PLUSPLUS", "++")
i += 1
column += 1
elseif i + 1 < len and source[i + 1] == "=" then
addToken("PLUSEQUALS", "+=")
i += 1
column += 1
else
addToken("PLUS", "+")
end
elseif char == "-" then
-- check if it's a -- or a -= or just a -
if i + 1 < len and source[i + 1] == "-" then
addToken("MINUSMINUS", "--")
i += 1
column += 1
elseif i + 1 < len and source[i + 1] == "=" then
addToken("MINUSEQUALS", "-=")
i += 1
column += 1
else
addToken("MINUS", "-")
end
elseif char == "*" then
addToken("TIMES", "*")
elseif char == "/" then
addToken("DIVIDE", "/")
elseif char == "%" then
addToken("MODULO", "%")
else
if char >= "0" and char <= "9" then
local startLine, startColumn = line, column
local number = ""
-- keep going until we hit a non-number
while i < len and source[i] >= "0" and source[i] <= "9" do
number ..= source[i]
column += 1
i += 1
end
column -= 1
i -= 1
addToken("NUMBER", number, startLine, startColumn)
elseif
char >= "a" and char <= "z" or char >= "A" and char <= "Z"
then
local startLine, startColumn = line, column
local identifierOrKeyword = ""
-- keep going until we hit a non-letter
while
i < len
and (
source[i] >= "a" and source[i] <= "z"
or source[i] >= "A" and source[i] <= "Z"
or source[i] >= "0" and source[i] <= "9"
)
do
identifierOrKeyword ..= source[i]
column += 1
i += 1
end
if i == len then
-- you can't end a program with an identifier
print(colour.red "cant end program with identifier")
exit(1)
end
column -= 1
i -= 1
-- check if it's a text operator
if textOperators[identifierOrKeyword] then
addToken(
"TEXTOPERATOR",
identifierOrKeyword,
startLine,
startColumn
)
continue
end
-- check if it's a keyword
if keywords[identifierOrKeyword] then
addToken(
"KEYWORD",
identifierOrKeyword,
startLine,
startColumn
)
continue
end
addToken(
"IDENTIFIER",
identifierOrKeyword,
startLine,
startColumn
)
else
print(
colour.red "that isnt a valid character",
colour.yellow(char)
)
exit(1)
end
end
end
return tokens
end
local function main()
if #process.args < 1 then
print(colour.red "No target file specified!")
print(colour.blue "Run 'melt-script help' for more information.")
exit(1)
end
local target = process.args[1]
local fi = fs.metadata(target)
if not fi.exists then
print(
colour.red "Target file",
colour.bold(target),
colour.red "does not exist!"
)
exit(1)
end
if fi.kind == "dir" then
print(
colour.bold(target),
colour.red "is a directory, please choose a file to compile!"
)
exit(1)
end
local source = fs.readFile(target)
-- replace \r\n with \n
source = string.gsub(source, "\r\n", "\n")
-- remove trailing newlines
source = string.gsub(source, "\n+$", "")
local split = string.split(source, "")
local tokens = lex(split)
for _, token in tokens do
if token.kind == "SPACE" then
continue
end
if token.kind == "NEWLINE" then
print "────────────────┼───────────────┼─────────────────────────────"
continue
end
-- print in a nice format
local function pad(str: string, len: number): string
return str .. string.rep(" ", len - #str)
end
print(
pad(`{target}:{token.line}:{token.column}`, 15),
"│",
pad(colour.yellow(token.kind), 22),
"│",
colour.purple(token.value)
)
end
local program = parse(tokens)
-- local out = generate(program)
print(program)
-- print(out)
end
main()