609 lines
12 KiB
Plaintext
609 lines
12 KiB
Plaintext
local fs = require "@lune/fs"
|
|
local process = require "@lune/process"
|
|
|
|
local exit = process.exit
|
|
local colour = require "colour"
|
|
|
|
local INDENT = "INDENT"
|
|
local SPACE = "SPACE"
|
|
local NEWLINE = "NEWLINE"
|
|
-- Literals
|
|
local IDENTIFIER = "IDENTIFIER"
|
|
local NUMBER = "NUMBER"
|
|
local COMMENT = "COMMENT"
|
|
local STRING = "STRING"
|
|
local KEYWORD = "KEYWORD"
|
|
-- Operators
|
|
local TEXTOPERATOR = "TEXTOPERATOR"
|
|
local EQUALS = "EQUALS"
|
|
local PLUS = "PLUS"
|
|
local PLUSPLUS = "PLUSPLUS"
|
|
local PLUSEQUALS = "PLUSEQUALS"
|
|
local MINUS = "MINUS"
|
|
local MINUSMINUS = "MINUSMINUS"
|
|
local MINUSEQUALS = "MINUSEQUALS"
|
|
local TIMES = "TIMES"
|
|
local DIVIDE = "DIVIDE"
|
|
local MODULO = "MODULO"
|
|
-- OPEN_BRACE = "OPEN_BRACE"
|
|
-- CLOSE_BRACE = "CLOSE_BRACE"
|
|
|
|
local keywords = {
|
|
["if"] = true,
|
|
["elseif"] = true,
|
|
["else"] = true,
|
|
["loop"] = true,
|
|
["for"] = true,
|
|
["break"] = true,
|
|
["continue"] = true,
|
|
}
|
|
|
|
local textOperators = {
|
|
["is"] = true,
|
|
["and"] = true,
|
|
["or"] = true,
|
|
["not"] = true,
|
|
}
|
|
|
|
local binaryOperators = {
|
|
["is"] = true,
|
|
["and"] = true,
|
|
["or"] = true,
|
|
["not"] = true,
|
|
["="] = true,
|
|
["+"] = true,
|
|
["+="] = true,
|
|
["-"] = true,
|
|
["-="] = true,
|
|
["*"] = true,
|
|
["/"] = true,
|
|
["%"] = true,
|
|
}
|
|
|
|
local postfixOperators = {
|
|
["++"] = true,
|
|
["--"] = true,
|
|
}
|
|
|
|
type Token = {
|
|
kind: string,
|
|
value: string,
|
|
line: number,
|
|
column: number,
|
|
}
|
|
|
|
type Expr = {
|
|
startToken: Token,
|
|
kind: string,
|
|
}
|
|
|
|
type BlockExpr = Expr & {
|
|
expressions: { Expr },
|
|
}
|
|
|
|
local function BlockExpr(startToken: Token, expressions: { Expr }): BlockExpr
|
|
return {
|
|
startToken = startToken,
|
|
kind = "block",
|
|
expressions = expressions,
|
|
}
|
|
end
|
|
|
|
type IfExpr = Expr & {
|
|
condition: Expr,
|
|
block: BlockExpr,
|
|
}
|
|
|
|
local function IfExpr(
|
|
startToken: Token,
|
|
condition: Expr,
|
|
block: BlockExpr
|
|
): IfExpr
|
|
return {
|
|
startToken = startToken,
|
|
kind = "if",
|
|
condition = condition,
|
|
block = block,
|
|
}
|
|
end
|
|
|
|
type ElseIfExpr = Expr & {
|
|
condition: Expr,
|
|
block: BlockExpr,
|
|
}
|
|
|
|
local function ElseIfExpr(
|
|
startToken: Token,
|
|
condition: Expr,
|
|
block: BlockExpr
|
|
): ElseIfExpr
|
|
return {
|
|
startToken = startToken,
|
|
kind = "elseif",
|
|
condition = condition,
|
|
block = block,
|
|
}
|
|
end
|
|
|
|
type ElseExpr = Expr & {
|
|
block: Expr,
|
|
}
|
|
|
|
local function ElseExpr(startToken: Token, block: BlockExpr): ElseExpr
|
|
return {
|
|
startToken = startToken,
|
|
kind = "else",
|
|
block = block,
|
|
}
|
|
end
|
|
|
|
type BinOpExpr = Expr & {
|
|
left: Expr,
|
|
right: Expr,
|
|
operator: Token,
|
|
}
|
|
|
|
local function BinOpExpr(
|
|
startToken: Token,
|
|
left: Expr,
|
|
right: Expr,
|
|
operator: Token
|
|
): BinOpExpr
|
|
return {
|
|
startToken = startToken,
|
|
kind = "binop",
|
|
left = left,
|
|
right = right,
|
|
operator = operator,
|
|
}
|
|
end
|
|
|
|
type PostfixOpExpr = Expr & {
|
|
expr: Expr,
|
|
operator: Token,
|
|
}
|
|
|
|
local function PostfixOpExpr(startToken: Token, expr: Expr, operator: Token)
|
|
return {
|
|
startToken = startToken,
|
|
kind = "postfix",
|
|
expr = expr,
|
|
operator = operator,
|
|
}
|
|
end
|
|
|
|
type FunctionCallExpr = Expr & {
|
|
name: Token,
|
|
arg: Expr,
|
|
}
|
|
|
|
local function FunctionCallExpr(
|
|
startToken: Token,
|
|
name: Token,
|
|
arg: Expr
|
|
): FunctionCallExpr
|
|
return {
|
|
startToken = startToken,
|
|
kind = "functioncall",
|
|
name = name,
|
|
arg = arg,
|
|
}
|
|
end
|
|
|
|
type IdentifierExpr = Expr
|
|
|
|
local function IdentifierExpr(startToken: Token): IdentifierExpr
|
|
return {
|
|
startToken = startToken,
|
|
kind = "identifier",
|
|
}
|
|
end
|
|
|
|
local function parse(tokens: { Token }): { Expr }
|
|
local program: { Expr } = {}
|
|
|
|
local function addExpr(expr: Expr)
|
|
table.insert(program, expr)
|
|
end
|
|
|
|
local i = 0
|
|
local len = #tokens
|
|
while i < len do
|
|
i += 1
|
|
local token = tokens[i]
|
|
local currentIndent = 0
|
|
|
|
local function getBlock(): { Token }
|
|
-- get tokens until the end of the block (which is the same indent level as the if statement)
|
|
local blockTokens: { Token } = {}
|
|
local blockIndent = 0
|
|
|
|
-- skip newline at start
|
|
i += 1
|
|
|
|
while i < len do
|
|
if tokens[i].kind == NEWLINE then
|
|
blockIndent = 0
|
|
-- chock next few tokens to see if they're indented
|
|
|
|
local j = i + 1
|
|
while j < len and tokens[j].kind == INDENT do
|
|
blockIndent += 1
|
|
j += 1
|
|
end
|
|
if blockIndent <= currentIndent then
|
|
break
|
|
end
|
|
end
|
|
table.insert(blockTokens, tokens[i])
|
|
i += 1
|
|
end
|
|
|
|
return blockTokens
|
|
end
|
|
|
|
local function getCond(): { Token }
|
|
local condTokens: { Token } = {}
|
|
|
|
-- skip the keyword
|
|
i += 1
|
|
|
|
-- get all tokens until the end of the line
|
|
while i < len and tokens[i + 1].kind ~= NEWLINE do
|
|
i += 1
|
|
table.insert(condTokens, tokens[i])
|
|
end
|
|
|
|
-- skip the newline
|
|
i += 1
|
|
|
|
return condTokens
|
|
end
|
|
|
|
local function nextNonSpace(): Token
|
|
while i < len and tokens[i].kind == SPACE do
|
|
i += 1
|
|
end
|
|
return tokens[i]
|
|
end
|
|
|
|
local function parseCond(condTokens: { Token }): Expr
|
|
local cond = parse(condTokens)
|
|
if #cond > 1 then
|
|
error(colour.red "too many exprs in cond")
|
|
elseif #cond < 1 then
|
|
error(colour.red "not enough exprs in cond")
|
|
end
|
|
|
|
return cond[1]
|
|
end
|
|
|
|
if token.kind == INDENT then
|
|
currentIndent += 1
|
|
elseif token.kind == NEWLINE then
|
|
currentIndent = 0
|
|
elseif token.kind == KEYWORD then
|
|
if token.value == "if" then
|
|
addExpr(
|
|
IfExpr(
|
|
token,
|
|
parseCond(getCond()),
|
|
BlockExpr(token, parse(getBlock()))
|
|
)
|
|
)
|
|
elseif token.value == "elseif" then
|
|
addExpr(
|
|
ElseIfExpr(
|
|
token,
|
|
parseCond(getCond()),
|
|
BlockExpr(token, parse(getBlock()))
|
|
)
|
|
)
|
|
elseif token.value == "else" then
|
|
-- skip newline
|
|
i += 1
|
|
|
|
addExpr(ElseExpr(token, BlockExpr(token, parse(getBlock()))))
|
|
else
|
|
print(token)
|
|
error(colour.red "unknown token value " .. token.value)
|
|
end
|
|
elseif token.kind == IDENTIFIER then
|
|
-- identifier is at the start of an expression, it could be:
|
|
-- 1: a binop (next token is a text operator or operator
|
|
-- 3: a postfix op (next token is ++ or --)
|
|
-- 4: a function call
|
|
-- after one 2am philosophical compiler thinking session, I've concluded that yes, an assignment is indeed a binop
|
|
|
|
-- skip the identifier
|
|
i += 1
|
|
local nextToken = nextNonSpace()
|
|
|
|
if binaryOperators[nextToken.value] then
|
|
-- binop
|
|
addExpr(BinOpExpr(
|
|
token,
|
|
IdentifierExpr(token),
|
|
-- get condition tokens as rhs
|
|
parseCond(getCond()),
|
|
nextToken
|
|
))
|
|
elseif postfixOperators[nextToken.value] then
|
|
-- postfix
|
|
else
|
|
-- function call
|
|
end
|
|
else
|
|
print(token)
|
|
error(colour.red "unknown token kind " .. token.kind)
|
|
end
|
|
end
|
|
|
|
return program
|
|
end
|
|
|
|
local function lex(source: { string }): { Token }
|
|
local tokens: { Token } = {}
|
|
|
|
local function last(n: number): Token
|
|
return tokens[#tokens - (n - 1)]
|
|
end
|
|
local line, column = 1, 0
|
|
|
|
local function addToken(
|
|
kind: string,
|
|
value: string,
|
|
newLine: number?,
|
|
newColumn: number?
|
|
)
|
|
table.insert(tokens, {
|
|
kind = kind,
|
|
value = value,
|
|
line = newLine or line,
|
|
column = newColumn or column,
|
|
})
|
|
end
|
|
|
|
local len = #source + 1
|
|
|
|
local i = 0
|
|
while i < len - 1 do
|
|
i += 1
|
|
local char = source[i]
|
|
column += 1
|
|
|
|
if char == "=" then
|
|
addToken(EQUALS, "=")
|
|
elseif char == "\n" then -- newline dont work for some reason
|
|
addToken(NEWLINE, "\n")
|
|
line += 1
|
|
column = 0
|
|
elseif char == " " then
|
|
addToken(SPACE, " ")
|
|
elseif char == "\t" then
|
|
-- only if last line is a newline or an indent
|
|
if last(1).kind == NEWLINE or last(1).kind == INDENT then
|
|
addToken(INDENT, "\t")
|
|
column += 3
|
|
else
|
|
addToken(SPACE, "\t")
|
|
end
|
|
elseif char == ";" then
|
|
-- parse till end of line
|
|
local startColumn = column
|
|
i += 1 -- skip the semicolon
|
|
local comment = ""
|
|
while i < len and source[i] ~= "\n" do
|
|
comment ..= source[i]
|
|
column += 1
|
|
i += 1
|
|
end
|
|
column -= 1
|
|
i -= 1
|
|
addToken(COMMENT, comment, line, startColumn)
|
|
elseif char == '"' then
|
|
local startLine, startColumn = line, column
|
|
|
|
local stringLiteral = ""
|
|
|
|
column += 1
|
|
i += 1 -- skip the first quote
|
|
while i < len and source[i] ~= '"' do
|
|
stringLiteral ..= source[i]
|
|
column += 1
|
|
i += 1
|
|
end
|
|
|
|
if i == len then
|
|
print(colour.red "unclosed string literal", stringLiteral)
|
|
exit(1)
|
|
end
|
|
|
|
addToken(STRING, stringLiteral, startLine, startColumn)
|
|
elseif char == "+" then
|
|
-- check if it's a ++ or a += or just a +
|
|
if i + 1 < len and source[i + 1] == "+" then
|
|
addToken(PLUSPLUS, "++")
|
|
i += 1
|
|
column += 1
|
|
elseif i + 1 < len and source[i + 1] == "=" then
|
|
addToken(PLUSEQUALS, "+=")
|
|
i += 1
|
|
column += 1
|
|
else
|
|
addToken(PLUS, "+")
|
|
end
|
|
elseif char == "-" then
|
|
-- check if it's a -- or a -= or just a -
|
|
if i + 1 < len and source[i + 1] == "-" then
|
|
addToken(MINUSMINUS, "--")
|
|
i += 1
|
|
column += 1
|
|
elseif i + 1 < len and source[i + 1] == "=" then
|
|
addToken(MINUSEQUALS, "-=")
|
|
i += 1
|
|
column += 1
|
|
else
|
|
addToken(MINUS, "-")
|
|
end
|
|
elseif char == "*" then
|
|
addToken(TIMES, "*")
|
|
elseif char == "/" then
|
|
addToken(DIVIDE, "/")
|
|
elseif char == "%" then
|
|
addToken(MODULO, "%")
|
|
else
|
|
if char >= "0" and char <= "9" then
|
|
local startLine, startColumn = line, column
|
|
|
|
local number = ""
|
|
|
|
-- keep going until we hit a non-number
|
|
while i < len and source[i] >= "0" and source[i] <= "9" do
|
|
number ..= source[i]
|
|
column += 1
|
|
i += 1
|
|
end
|
|
column -= 1
|
|
i -= 1
|
|
addToken(NUMBER, number, startLine, startColumn)
|
|
elseif
|
|
char >= "a" and char <= "z" or char >= "A" and char <= "Z"
|
|
then
|
|
local startLine, startColumn = line, column
|
|
|
|
local identifierOrKeyword = ""
|
|
|
|
-- keep going until we hit a non-letter
|
|
while
|
|
i < len
|
|
and (
|
|
source[i] >= "a" and source[i] <= "z"
|
|
or source[i] >= "A" and source[i] <= "Z"
|
|
or source[i] >= "0" and source[i] <= "9"
|
|
)
|
|
do
|
|
identifierOrKeyword ..= source[i]
|
|
column += 1
|
|
i += 1
|
|
end
|
|
|
|
if i == len then
|
|
-- you can't end a program with an identifier
|
|
print(colour.red "cant end program with identifier")
|
|
exit(1)
|
|
end
|
|
|
|
column -= 1
|
|
i -= 1
|
|
|
|
-- check if it's a text operator
|
|
if textOperators[identifierOrKeyword] then
|
|
addToken(
|
|
TEXTOPERATOR,
|
|
identifierOrKeyword,
|
|
startLine,
|
|
startColumn
|
|
)
|
|
continue
|
|
end
|
|
|
|
-- check if it's a keyword
|
|
if keywords[identifierOrKeyword] then
|
|
addToken(
|
|
KEYWORD,
|
|
identifierOrKeyword,
|
|
startLine,
|
|
startColumn
|
|
)
|
|
continue
|
|
end
|
|
|
|
addToken(
|
|
IDENTIFIER,
|
|
identifierOrKeyword,
|
|
startLine,
|
|
startColumn
|
|
)
|
|
else
|
|
print(
|
|
colour.red "that isnt a valid character",
|
|
colour.yellow(char)
|
|
)
|
|
exit(1)
|
|
end
|
|
end
|
|
end
|
|
|
|
return tokens
|
|
end
|
|
|
|
local function main()
|
|
if #process.args < 1 then
|
|
print(colour.red "No target file specified!")
|
|
print(colour.blue "Run 'melt-script help' for more information.")
|
|
exit(1)
|
|
end
|
|
local target = process.args[1]
|
|
|
|
local fi = fs.metadata(target)
|
|
if not fi.exists then
|
|
print(
|
|
colour.red "Target file",
|
|
colour.bold(target),
|
|
colour.red "does not exist!"
|
|
)
|
|
exit(1)
|
|
end
|
|
if fi.kind == "dir" then
|
|
print(
|
|
colour.bold(target),
|
|
colour.red "is a directory, please choose a file to compile!"
|
|
)
|
|
exit(1)
|
|
end
|
|
|
|
local source = fs.readFile(target)
|
|
|
|
-- replace \r\n with \n
|
|
source = string.gsub(source, "\r\n", "\n")
|
|
-- remove trailing newlines
|
|
source = string.gsub(source, "\n+$", "")
|
|
|
|
local split = string.split(source, "")
|
|
|
|
local tokens = lex(split)
|
|
|
|
for _, token in tokens do
|
|
if token.kind == SPACE then
|
|
continue
|
|
end
|
|
if token.kind == NEWLINE then
|
|
print "────────────────┼───────────────┼─────────────────────────────"
|
|
continue
|
|
end
|
|
|
|
-- print in a nice format
|
|
local function pad(str: string, len: number): string
|
|
return str .. string.rep(" ", len - #str)
|
|
end
|
|
|
|
print(
|
|
pad(`{target}:{token.line}:{token.column}`, 15),
|
|
"│",
|
|
pad(colour.yellow(token.kind), 22),
|
|
"│",
|
|
colour.purple(token.value)
|
|
)
|
|
end
|
|
|
|
local program = parse(tokens)
|
|
-- local out = generate(program)
|
|
|
|
print(program)
|
|
|
|
-- print(out)
|
|
end
|
|
|
|
main()
|