721 lines
14 KiB
Plaintext
721 lines
14 KiB
Plaintext
local fs = require "@lune/fs"
|
|
local process = require "@lune/process"
|
|
|
|
local exit = process.exit
|
|
local colour = require "colour"
|
|
|
|
type TokenKind =
|
|
"INDENT"
|
|
| "SPACE"
|
|
| "NEWLINE"
|
|
| "IDENTIFIER"
|
|
| "NUMBER"
|
|
| "COMMENT"
|
|
| "STRING"
|
|
| "KEYWORD"
|
|
| "TEXTOPERATOR"
|
|
| "EQUALS"
|
|
| "PLUS"
|
|
| "PLUSPLUS"
|
|
| "PLUSEQUALS"
|
|
| "MINUS"
|
|
| "MINUSMINUS"
|
|
| "MINUSEQUALS"
|
|
| "TIMES"
|
|
| "DIVIDE"
|
|
| "MODULO"
|
|
|
|
local keywords = {
|
|
["if"] = true,
|
|
["elseif"] = true,
|
|
["else"] = true,
|
|
["loop"] = true,
|
|
["for"] = true,
|
|
["break"] = true,
|
|
["continue"] = true,
|
|
}
|
|
|
|
local textOperators = {
|
|
["is"] = true,
|
|
["and"] = true,
|
|
["or"] = true,
|
|
["not"] = true,
|
|
}
|
|
|
|
local binaryOperators = {
|
|
["is"] = true,
|
|
["and"] = true,
|
|
["or"] = true,
|
|
["not"] = true,
|
|
["="] = true,
|
|
["+"] = true,
|
|
["+="] = true,
|
|
["-"] = true,
|
|
["-="] = true,
|
|
["*"] = true,
|
|
["/"] = true,
|
|
["%"] = true,
|
|
}
|
|
|
|
local postfixOperators = {
|
|
["++"] = true,
|
|
["--"] = true,
|
|
}
|
|
|
|
type Token = {
|
|
kind: TokenKind,
|
|
value: string,
|
|
line: number,
|
|
column: number,
|
|
}
|
|
|
|
type Expr = {
|
|
startToken: Token,
|
|
kind: string,
|
|
}
|
|
|
|
type BlockExpr = Expr & {
|
|
expressions: { Expr },
|
|
}
|
|
|
|
local function BlockExpr(startToken: Token, expressions: { Expr }): BlockExpr
|
|
return {
|
|
startToken = startToken,
|
|
kind = "block",
|
|
expressions = expressions,
|
|
}
|
|
end
|
|
|
|
type IfExpr = Expr & {
|
|
condition: Expr,
|
|
block: BlockExpr,
|
|
}
|
|
|
|
local function IfExpr(
|
|
startToken: Token,
|
|
condition: Expr,
|
|
block: BlockExpr
|
|
): IfExpr
|
|
return {
|
|
startToken = startToken,
|
|
kind = "if",
|
|
condition = condition,
|
|
block = block,
|
|
}
|
|
end
|
|
|
|
type ElseIfExpr = Expr & {
|
|
condition: Expr,
|
|
block: BlockExpr,
|
|
}
|
|
|
|
local function ElseIfExpr(
|
|
startToken: Token,
|
|
condition: Expr,
|
|
block: BlockExpr
|
|
): ElseIfExpr
|
|
return {
|
|
startToken = startToken,
|
|
kind = "elseif",
|
|
condition = condition,
|
|
block = block,
|
|
}
|
|
end
|
|
|
|
type ElseExpr = Expr & {
|
|
block: Expr,
|
|
}
|
|
|
|
local function ElseExpr(startToken: Token, block: BlockExpr): ElseExpr
|
|
return {
|
|
startToken = startToken,
|
|
kind = "else",
|
|
block = block,
|
|
}
|
|
end
|
|
|
|
type LoopExpr = Expr & {
|
|
block: Expr,
|
|
}
|
|
|
|
local function LoopExpr(startToken: Token, block: BlockExpr): LoopExpr
|
|
return {
|
|
startToken = startToken,
|
|
kind = "loop",
|
|
block = block,
|
|
}
|
|
end
|
|
|
|
type BinOpExpr = Expr & {
|
|
left: Expr,
|
|
right: Expr,
|
|
operator: Token,
|
|
}
|
|
|
|
local function BinOpExpr(
|
|
startToken: Token,
|
|
left: Expr,
|
|
right: Expr,
|
|
operator: Token
|
|
): BinOpExpr
|
|
return {
|
|
startToken = startToken,
|
|
kind = "binop",
|
|
left = left,
|
|
right = right,
|
|
operator = operator,
|
|
}
|
|
end
|
|
|
|
type PostfixOpExpr = Expr & {
|
|
expr: Expr,
|
|
operator: Token,
|
|
}
|
|
|
|
local function PostfixOpExpr(startToken: Token, expr: Expr, operator: Token)
|
|
return {
|
|
startToken = startToken,
|
|
kind = "postfix",
|
|
expr = expr,
|
|
operator = operator,
|
|
}
|
|
end
|
|
|
|
type FunctionCallExpr = Expr & {
|
|
name: Token,
|
|
arg: Expr,
|
|
}
|
|
|
|
local function FunctionCallExpr(
|
|
startToken: Token,
|
|
name: Token,
|
|
arg: Expr
|
|
): FunctionCallExpr
|
|
return {
|
|
startToken = startToken,
|
|
kind = "functioncall",
|
|
name = name,
|
|
arg = arg,
|
|
}
|
|
end
|
|
|
|
type IdentifierExpr = Expr
|
|
|
|
local function IdentifierExpr(startToken: Token): IdentifierExpr
|
|
if startToken.kind ~= "IDENTIFIER" then
|
|
error(`expected identifier, got {startToken.kind}`)
|
|
end
|
|
return {
|
|
startToken = startToken,
|
|
kind = "identifier",
|
|
}
|
|
end
|
|
|
|
type NumberExpr = Expr
|
|
|
|
local function NumberExpr(startToken: Token): NumberExpr
|
|
if startToken.kind ~= "NUMBER" then
|
|
error(`expected number, got {startToken.kind}`)
|
|
end
|
|
return {
|
|
startToken = startToken,
|
|
kind = "identifier",
|
|
}
|
|
end
|
|
|
|
type StringExpr = Expr
|
|
|
|
local function StringExpr(startToken: Token): StringExpr
|
|
if startToken.kind ~= "STRING" then
|
|
error(`expected number, got {startToken.kind}`)
|
|
end
|
|
return {
|
|
startToken = startToken,
|
|
kind = "identifier",
|
|
}
|
|
end
|
|
|
|
-- yea
|
|
|
|
local function parse(tokens: { Token }): { Expr }
|
|
local program: { Expr } = {}
|
|
|
|
local function addExpr(expr: Expr)
|
|
table.insert(program, expr)
|
|
end
|
|
|
|
local i = 0
|
|
local len = #tokens
|
|
while i < len do
|
|
i += 1
|
|
local token = tokens[i]
|
|
local currentIndent = 0
|
|
|
|
local function getBlock(): { Token }
|
|
-- get tokens until the end of the block (which is the same indent level as the if statement)
|
|
local blockTokens: { Token } = {}
|
|
local blockIndent = 0
|
|
|
|
-- skip newline at start
|
|
i += 1
|
|
|
|
while i < len do
|
|
if tokens[i].kind == "NEWLINE" then
|
|
blockIndent = 0
|
|
-- chock next few tokens to see if they're indented
|
|
|
|
local j = i + 1
|
|
while j < len and tokens[j].kind == "INDENT" do
|
|
blockIndent += 1
|
|
j += 1
|
|
end
|
|
if blockIndent <= currentIndent then
|
|
break
|
|
end
|
|
end
|
|
table.insert(blockTokens, tokens[i])
|
|
i += 1
|
|
end
|
|
|
|
return blockTokens
|
|
end
|
|
|
|
local function getCond(): { Token }
|
|
local condTokens: { Token } = {}
|
|
|
|
-- skip the keyword
|
|
i += 1
|
|
|
|
-- get all tokens until the end of the line
|
|
while i < len and tokens[i + 1].kind ~= "NEWLINE" do
|
|
i += 1
|
|
table.insert(condTokens, tokens[i])
|
|
end
|
|
|
|
-- skip the newline
|
|
i += 1
|
|
|
|
return condTokens
|
|
end
|
|
|
|
local function nextNonSpace(): (Token, number)
|
|
local j = i
|
|
while j < len and tokens[j].kind == "SPACE" do
|
|
j += 1
|
|
end
|
|
return tokens[j], j
|
|
end
|
|
|
|
local function parseCond(condTokens: { Token }): Expr
|
|
local cond = parse(condTokens)
|
|
if #cond > 1 then
|
|
error(colour.red "too many exprs in cond")
|
|
elseif #cond < 1 then
|
|
error(colour.red "not enough exprs in cond")
|
|
end
|
|
|
|
return cond[1]
|
|
end
|
|
|
|
if token.kind == "INDENT" then
|
|
currentIndent += 1
|
|
elseif token.kind == "NEWLINE" then
|
|
currentIndent = 0
|
|
elseif token.kind == "KEYWORD" then
|
|
if token.value == "if" then
|
|
addExpr(
|
|
IfExpr(
|
|
token,
|
|
parseCond(getCond()),
|
|
BlockExpr(token, parse(getBlock()))
|
|
)
|
|
)
|
|
elseif token.value == "elseif" then
|
|
addExpr(
|
|
ElseIfExpr(
|
|
token,
|
|
parseCond(getCond()),
|
|
BlockExpr(token, parse(getBlock()))
|
|
)
|
|
)
|
|
elseif token.value == "else" then
|
|
-- skip newline
|
|
i += 1
|
|
|
|
addExpr(ElseExpr(token, BlockExpr(token, parse(getBlock()))))
|
|
elseif token.value == "loop" then
|
|
-- skip newline
|
|
i += 1
|
|
|
|
addExpr(LoopExpr(token, BlockExpr(token, parse(getBlock()))))
|
|
else
|
|
print(token)
|
|
error(colour.red "unknown token value " .. token.value)
|
|
end
|
|
elseif token.kind == "IDENTIFIER" then
|
|
-- identifier is at the start of an expression, it could be:
|
|
-- 1: a binop (next token is a text operator or operator
|
|
-- 3: a postfix op (next token is ++ or --)
|
|
-- 4: a function call
|
|
-- 5: standalone
|
|
-- after one 2am philosophical compiler thinking session, I've concluded that yes, an assignment is indeed a binop
|
|
|
|
-- skip the identifier
|
|
i += 1
|
|
local nextToken, advance = nextNonSpace()
|
|
|
|
if not nextToken then
|
|
-- standalone
|
|
i = advance
|
|
addExpr(IdentifierExpr(token))
|
|
elseif binaryOperators[nextToken.value] then
|
|
-- binop
|
|
i = advance
|
|
addExpr(BinOpExpr(
|
|
token,
|
|
IdentifierExpr(token),
|
|
-- get condition tokens as rhs
|
|
parseCond(getCond()),
|
|
nextToken
|
|
))
|
|
elseif postfixOperators[nextToken.value] then
|
|
-- postfix
|
|
error "unimplemented"
|
|
else
|
|
i -= 1 -- getCond skips the identifier
|
|
addExpr(FunctionCallExpr(token, token, parseCond(getCond())))
|
|
end
|
|
elseif token.kind == "NUMBER" then
|
|
-- number is at the start of an expression, it could be:
|
|
-- 1: a binop (next token is a text operator or operator
|
|
-- 2: standalone
|
|
|
|
-- skip the number
|
|
i += 1
|
|
local nextToken, advance = nextNonSpace()
|
|
|
|
local function standalone()
|
|
i = advance
|
|
addExpr(NumberExpr(token))
|
|
end
|
|
|
|
if not nextToken then
|
|
standalone()
|
|
elseif binaryOperators[nextToken.value] then
|
|
-- binop
|
|
i = advance
|
|
addExpr(BinOpExpr(
|
|
token,
|
|
IdentifierExpr(token),
|
|
-- get condition tokens as rhs
|
|
parseCond(getCond()),
|
|
nextToken
|
|
))
|
|
else
|
|
standalone()
|
|
end
|
|
elseif token.kind == "STRING" then
|
|
-- string is at the start of an expression, it could be:
|
|
-- 1: a binop (next token is a text operator or operator
|
|
-- 2: standalone
|
|
|
|
-- skip the string
|
|
i += 1
|
|
local nextToken, advance = nextNonSpace()
|
|
|
|
local function standalone()
|
|
i = advance
|
|
addExpr(StringExpr(token))
|
|
end
|
|
|
|
if not nextToken then
|
|
standalone()
|
|
elseif binaryOperators[nextToken.value] then
|
|
-- binop
|
|
i = advance
|
|
addExpr(BinOpExpr(
|
|
token,
|
|
IdentifierExpr(token),
|
|
-- get condition tokens as rhs
|
|
parseCond(getCond()),
|
|
nextToken
|
|
))
|
|
else
|
|
standalone()
|
|
end
|
|
elseif token.kind == "SPACE" or token.kind == "COMMENT" then
|
|
-- wtf
|
|
else
|
|
print(token)
|
|
error(colour.red "unknown token kind " .. token.kind)
|
|
end
|
|
end
|
|
|
|
return program
|
|
end
|
|
|
|
local function lex(source: { string }): { Token }
|
|
local tokens: { Token } = {}
|
|
|
|
local function last(n: number): Token
|
|
return tokens[#tokens - (n - 1)]
|
|
end
|
|
local line, column = 1, 0
|
|
|
|
local function addToken(
|
|
kind: TokenKind,
|
|
value: string,
|
|
newLine: number?,
|
|
newColumn: number?
|
|
)
|
|
table.insert(tokens, {
|
|
kind = kind,
|
|
value = value,
|
|
line = newLine or line,
|
|
column = newColumn or column,
|
|
})
|
|
end
|
|
|
|
local len = #source + 1
|
|
|
|
local i = 0
|
|
while i < len - 1 do
|
|
i += 1
|
|
local char = source[i]
|
|
column += 1
|
|
|
|
if char == "=" then
|
|
addToken("EQUALS", "=")
|
|
elseif char == "\n" then -- newline dont work for some reason
|
|
addToken("NEWLINE", "\n")
|
|
line += 1
|
|
column = 0
|
|
elseif char == " " then
|
|
addToken("SPACE", " ")
|
|
elseif char == "\t" then
|
|
-- only if last line is a newline or an indent
|
|
if last(1).kind == "NEWLINE" or last(1).kind == "INDENT" then
|
|
addToken("INDENT", "\t")
|
|
column += 3
|
|
else
|
|
addToken("SPACE", "\t")
|
|
end
|
|
elseif char == ";" then
|
|
-- parse till end of line
|
|
local startColumn = column
|
|
i += 1 -- skip the semicolon
|
|
local comment = ""
|
|
while i < len and source[i] ~= "\n" do
|
|
comment ..= source[i]
|
|
column += 1
|
|
i += 1
|
|
end
|
|
column -= 1
|
|
i -= 1
|
|
addToken("COMMENT", comment, line, startColumn)
|
|
elseif char == '"' then
|
|
local startLine, startColumn = line, column
|
|
|
|
local stringLiteral = ""
|
|
|
|
column += 1
|
|
i += 1 -- skip the first quote
|
|
while i < len and source[i] ~= '"' do
|
|
stringLiteral ..= source[i]
|
|
column += 1
|
|
i += 1
|
|
end
|
|
|
|
if i == len then
|
|
print(colour.red "unclosed string literal", stringLiteral)
|
|
exit(1)
|
|
end
|
|
|
|
addToken("STRING", stringLiteral, startLine, startColumn)
|
|
elseif char == "+" then
|
|
-- check if it's a ++ or a += or just a +
|
|
if i + 1 < len and source[i + 1] == "+" then
|
|
addToken("PLUSPLUS", "++")
|
|
i += 1
|
|
column += 1
|
|
elseif i + 1 < len and source[i + 1] == "=" then
|
|
addToken("PLUSEQUALS", "+=")
|
|
i += 1
|
|
column += 1
|
|
else
|
|
addToken("PLUS", "+")
|
|
end
|
|
elseif char == "-" then
|
|
-- check if it's a -- or a -= or just a -
|
|
if i + 1 < len and source[i + 1] == "-" then
|
|
addToken("MINUSMINUS", "--")
|
|
i += 1
|
|
column += 1
|
|
elseif i + 1 < len and source[i + 1] == "=" then
|
|
addToken("MINUSEQUALS", "-=")
|
|
i += 1
|
|
column += 1
|
|
else
|
|
addToken("MINUS", "-")
|
|
end
|
|
elseif char == "*" then
|
|
addToken("TIMES", "*")
|
|
elseif char == "/" then
|
|
addToken("DIVIDE", "/")
|
|
elseif char == "%" then
|
|
addToken("MODULO", "%")
|
|
else
|
|
if char >= "0" and char <= "9" then
|
|
local startLine, startColumn = line, column
|
|
|
|
local number = ""
|
|
|
|
-- keep going until we hit a non-number
|
|
while i < len and source[i] >= "0" and source[i] <= "9" do
|
|
number ..= source[i]
|
|
column += 1
|
|
i += 1
|
|
end
|
|
column -= 1
|
|
i -= 1
|
|
addToken("NUMBER", number, startLine, startColumn)
|
|
elseif
|
|
char >= "a" and char <= "z" or char >= "A" and char <= "Z"
|
|
then
|
|
local startLine, startColumn = line, column
|
|
|
|
local identifierOrKeyword = ""
|
|
|
|
-- keep going until we hit a non-letter
|
|
while
|
|
i < len
|
|
and (
|
|
source[i] >= "a" and source[i] <= "z"
|
|
or source[i] >= "A" and source[i] <= "Z"
|
|
or source[i] >= "0" and source[i] <= "9"
|
|
)
|
|
do
|
|
identifierOrKeyword ..= source[i]
|
|
column += 1
|
|
i += 1
|
|
end
|
|
|
|
if i == len then
|
|
-- you can't end a program with an identifier
|
|
print(colour.red "cant end program with identifier")
|
|
exit(1)
|
|
end
|
|
|
|
column -= 1
|
|
i -= 1
|
|
|
|
-- check if it's a text operator
|
|
if textOperators[identifierOrKeyword] then
|
|
addToken(
|
|
"TEXTOPERATOR",
|
|
identifierOrKeyword,
|
|
startLine,
|
|
startColumn
|
|
)
|
|
continue
|
|
end
|
|
|
|
-- check if it's a keyword
|
|
if keywords[identifierOrKeyword] then
|
|
addToken(
|
|
"KEYWORD",
|
|
identifierOrKeyword,
|
|
startLine,
|
|
startColumn
|
|
)
|
|
continue
|
|
end
|
|
|
|
addToken(
|
|
"IDENTIFIER",
|
|
identifierOrKeyword,
|
|
startLine,
|
|
startColumn
|
|
)
|
|
else
|
|
print(
|
|
colour.red "that isnt a valid character",
|
|
colour.yellow(char)
|
|
)
|
|
exit(1)
|
|
end
|
|
end
|
|
end
|
|
|
|
return tokens
|
|
end
|
|
|
|
local function main()
|
|
if #process.args < 1 then
|
|
print(colour.red "No target file specified!")
|
|
print(colour.blue "Run 'melt-script help' for more information.")
|
|
exit(1)
|
|
end
|
|
local target = process.args[1]
|
|
|
|
local fi = fs.metadata(target)
|
|
if not fi.exists then
|
|
print(
|
|
colour.red "Target file",
|
|
colour.bold(target),
|
|
colour.red "does not exist!"
|
|
)
|
|
exit(1)
|
|
end
|
|
if fi.kind == "dir" then
|
|
print(
|
|
colour.bold(target),
|
|
colour.red "is a directory, please choose a file to compile!"
|
|
)
|
|
exit(1)
|
|
end
|
|
|
|
local source = fs.readFile(target)
|
|
|
|
-- replace \r\n with \n
|
|
source = string.gsub(source, "\r\n", "\n")
|
|
-- remove trailing newlines
|
|
source = string.gsub(source, "\n+$", "")
|
|
|
|
local split = string.split(source, "")
|
|
|
|
local tokens = lex(split)
|
|
|
|
for _, token in tokens do
|
|
if token.kind == "SPACE" then
|
|
continue
|
|
end
|
|
if token.kind == "NEWLINE" then
|
|
print "────────────────┼───────────────┼─────────────────────────────"
|
|
continue
|
|
end
|
|
|
|
-- print in a nice format
|
|
local function pad(str: string, len: number): string
|
|
return str .. string.rep(" ", len - #str)
|
|
end
|
|
|
|
print(
|
|
pad(`{target}:{token.line}:{token.column}`, 15),
|
|
"│",
|
|
pad(colour.yellow(token.kind), 22),
|
|
"│",
|
|
colour.purple(token.value)
|
|
)
|
|
end
|
|
|
|
local program = parse(tokens)
|
|
-- local out = generate(program)
|
|
|
|
print(program)
|
|
|
|
-- print(out)
|
|
end
|
|
|
|
main()
|