Begin parser rewrite, overhaul language to make more functional

2024-04-05 06:40:47 +01:00 · 2024-04-05 06:40:47 +01:00 · 9b2c5b4870
parent 6a41028640
commit 9b2c5b4870
3 changed files with 238 additions and 321 deletions
--- a/Script/colour.luau
+++ b/Script/colour.luau
@ -15,35 +15,35 @@ local reset = style "reset"
 local Colour = {}
-function Colour.blue(str: string)
+function Colour.blue(str: string | number)
 	return blue .. str .. reset
 end
-function Colour.green(str: string)
+function Colour.green(str: string | number)
 	return green .. str .. reset
 end
-function Colour.purple(str: string)
+function Colour.purple(str: string | number)
 	return purple .. str .. reset
 end
-function Colour.red(str: string)
+function Colour.red(str: string | number)
 	return red .. str .. reset
 end
-function Colour.yellow(str: string)
+function Colour.yellow(str: string | number)
 	return yellow .. str .. reset
 end
-function Colour.cyan(str: string)
+function Colour.cyan(str: string | number)
 	return cyan .. str .. reset
 end
-function Colour.bold(str: string)
+function Colour.bold(str: string | number)
 	return bold .. str .. reset
 end
-function Colour.dim(str: string)
+function Colour.dim(str: string | number)
 	return dim .. str .. reset
 end
--- a/Script/main.luau
+++ b/Script/main.luau
@ -6,6 +6,7 @@ local colour = require "colour"
 type TokenKind =
 	"INDENT"
 	| "DEDENT"
 	| "SPACE"
 	| "NEWLINE"
 	| "IDENTIFIER"
@ -15,19 +16,23 @@ type TokenKind =
 	| "TEXTOPERATOR"
 	| "EQUALS"
 	| "PLUS"
 	| "PLUSPLUS"
 	| "PLUSEQUALS"
 	| "MINUS"
 	| "MINUSMINUS"
 	| "MINUSEQUALS"
 	| "TIMES"
 	| "DIVIDE"
 	| "MODULO"
 	| "EXPONENT"
 	| "COLON"
 	| "SEMICOLON"
 	| "LPAREN"
 	| "RPAREN"
 	| "LBRACE"
 	| "RBRACE"
 	| "LBRACKET"
 	| "RBRACKET"
 type ExprKind =
 	"block"
 	| "if"
 	| "elseif"
 	| "else"
 	| "binop"
 	| "postfix"
@ -38,7 +43,6 @@ type ExprKind =
 local keywords = {
 	["if"] = true,
 	["elseif"] = true,
 	["else"] = true,
 }
@ -56,19 +60,12 @@ local binaryOperators = {
 	["not"] = true,
 	["="] = true,
 	["+"] = true,
 	["+="] = true,
 	["-"] = true,
 	["-="] = true,
 	["*"] = true,
 	["/"] = true,
 	["%"] = true,
 }
 local postfixOperators = {
 	["++"] = true,
 	["--"] = true,
 }
 type Token = {
 	kind: TokenKind,
 	value: string,
@ -93,57 +90,24 @@ local function BlockExpr(startToken: Token, expressions: { Expr }): BlockExpr
 	}
 end
 type ElseExpr = Expr & {
 	block: Expr,
 }
 local function ElseExpr(startToken: Token, block: BlockExpr): ElseExpr
 	return {
 		startToken = startToken,
 		kind = "else" :: ExprKind,
 		block = block,
 	}
 end
 type ElseIfExpr = Expr & {
 	condition: Expr,
 	block: BlockExpr,
 	next: (ElseIfExpr | ElseExpr)?,
 }
 local function ElseIfExpr(
 	startToken: Token,
 	condition: Expr,
 	block: BlockExpr,
 	next: (ElseIfExpr | ElseExpr)?
 ): ElseIfExpr
 	return {
 		startToken = startToken,
 		kind = "elseif" :: ExprKind,
 		condition = condition,
 		block = block,
 		next = next,
 	}
 end
 type IfExpr = Expr & {
 	condition: Expr,
-	block: BlockExpr,
+	ifBlock: BlockExpr,
-	next: (ElseIfExpr | ElseExpr)?,
+	elseBlock: BlockExpr,
 }
 local function IfExpr(
 	startToken: Token,
 	condition: Expr,
-	block: BlockExpr,
+	ifBlock: BlockExpr,
-	next: (ElseIfExpr | ElseExpr)?
+	elseBlock: BlockExpr
 ): IfExpr
 	return {
 		startToken = startToken,
 		kind = "if" :: ExprKind,
 		condition = condition,
-		block = block,
+		ifBlock = ifBlock,
-		next = next,
+		elseBlock = elseBlock,
 	}
 end
@ -315,9 +279,12 @@ local function generate(program: { Expr }): string
 			block ..= generate { ifexpr.condition }
 			block ..= " then\n"
-			block ..= indent(generate { ifexpr.block }, 1)
+			block ..= indent(generate { ifexpr.ifBlock }, 1)
 			block ..= "\n"
 			block ..= "else\n"
 			block ..= indent(generate { ifexpr.elseBlock }, 1)
 			block ..= "end"
 			output ..= indent(block, 1)
@ -335,19 +302,6 @@ local function generate(program: { Expr }): string
 			end
 			output ..= "return "
 			output ..= generate { block.expressions[b + 1] }
 		elseif kind == "postfix" then
 			local postfix = expr :: PostfixOpExpr
 			output ..= generate { postfix.expr }
 			local value = postfix.operator.value
 			if value == "++" then
 				output ..= " += 1\n"
 			elseif value == "--" then
 				output ..= " -= 1\n"
 			else
 				error(`unknown postfix operator {value}`)
 			end
 		else
 			error(`unknown expr kind {kind}`)
 		end
@ -356,6 +310,45 @@ local function generate(program: { Expr }): string
 	return output
 end
 local printIndent = 0
 local function printToken(token: Token)
 	local pos = `{token.line}:{token.column}`
 	while #pos < 5 do
 		pos ..= " "
 	end
 	local kind = token.kind
 	while #kind < 13 do
 		kind ..= " "
 	end
 	local value = token.value
 	if token.kind == "STRING" then
 		value = colour.green(`"{value}"`)
 	elseif token.kind == "NUMBER" then
 		value = colour.yellow(value)
 	elseif token.kind == "IDENTIFIER" then
 		value = colour.cyan(value)
 	elseif token.kind == "KEYWORD" then
 		value = colour.red(value)
 	elseif token.kind == "INDENT" then
 		value = "{"
 		printIndent += 1
 	elseif token.kind == "DEDENT" then
 		value = "}"
 		printIndent -= 1
 	elseif token.kind == "NEWLINE" or token.kind == "SPACE" then
 		value = ""
 	end
 	for _ = 1, printIndent - if token.kind == "INDENT" then 1 else 0 do
 		value = "  " .. value
 	end
 	print(pos, colour.blue(kind), colour.bold(value))
 end
 local function parse(tokens: { Token }): { Expr }
 	local program: { Expr } = {}
@ -363,228 +356,116 @@ local function parse(tokens: { Token }): { Expr }
 		error(colour.red "no tokens to parse")
 	end
-	local function addExpr(expr: Expr)
+	-- remove spaces and newlines
-		table.insert(program, expr)
+	for i, token in tokens do
 		if token.kind == "SPACE" then
 			table.remove(tokens, i)
 		end
 	end
-	local i = 0
+	-- A program is a list of expressions
 	local len = #tokens
 	while i < len do
 		i += 1
 		local token = tokens[i]
 		local currentIndent = 0
-		local function getBlock(): { Token }
+	local function next(): Token
-			-- get tokens until the end of the block (which is the same indent level as the if statement)
+		return tokens[1]
-			local blockTokens: { Token } = {}
+	end
 			local blockIndent = 0
-			if #tokens == 0 then
+	local function get(): Token
-				error(colour.red "tried to get empty block")
+		local token = next()
 		table.remove(tokens, 1)
 		return token
 	end
 	local function eat(kind: TokenKind): Token
 		local token = get()
 		if token.kind ~= kind then
 			print(
 				colour.red "expected",
 				colour.yellow(kind),
 				colour.red "got",
 				colour.yellow(token.kind)
 			)
 			exit(1)
 		end
 		return token
 	end
 	local function canEndAnExpression(token: Token): boolean
 		local kind: TokenKind = token.kind
 		return kind == "IDENTIFIER"
 			or kind == "NUMBER"
 			or kind == "STRING"
 			or kind == "RPAREN"
 			or kind == "RBRACE"
 			or kind == "RBRACKET"
 	end
 	local function getIfExprCond(): { Token }
 		local tokens: { Token } = {}
 		local depth = 0
 		while true do
 			local token = get()
 			print("got token", token)
 			if token.kind == "COLON" and depth == 0 then
 				break
 			elseif token.kind == "KEYWORD" and token.value == "if" then
 				-- keywords that require a colon
 				depth += 1
 			end
-
+			table.insert(tokens, token)
 			while i < len do -- todo figure out if the + 1 breaks something or not its 5:57 am idck
 				if tokens[i].kind == "NEWLINE" then
 					blockIndent = 0
 					-- chock next few tokens to see if they're indented
 					local j = i + 1
 					while j < len and tokens[j].kind == "INDENT" do
 						blockIndent += 1
 						j += 1
 					end
 					if blockIndent <= currentIndent then
 						print "block finished"
 						break
 					end
 				end
 				table.insert(blockTokens, tokens[i])
 				i += 1
 			end
 			if i >= len then
 				print "welp"
 			end
 			if #blockTokens == 0 then
 				error(colour.red "empty block")
 			end
 			print(blockTokens)
 			return blockTokens
 		end
-		local function getCond(): { Token }
+		return tokens
-			local condTokens: { Token } = {}
+	end
-			-- get all tokens until the end of the line
+	local function getUntilEndOfExpression(): { Token }
-			while i < len and tokens[i + 1].kind ~= "NEWLINE" do
+		local tokens: { Token } = {}
-				i += 1
+
-				table.insert(condTokens, tokens[i])
+		-- just because a token can end an expression doesn't mean it does
 		local startToken = next()
 		if startToken.kind == "KEYWORD" then
 			if startToken.value == "if" then
 				-- skip the if keyword
 				get()
 				-- first get the condition
 				local conditionTokens = getIfExprCond()
 				print("Got tokns", conditionTokens)
 			end
-
+		else
-			return condTokens
+			print(
 				colour.red "unimplemented token",
 				colour.yellow(startToken.kind)
 			)
 			exit(1)
 		end
-		local function nextNonSpace(): (Token, number)
+		return tokens
-			local j = i
+	end
-			while j < len and tokens[j].kind == "SPACE" do
+
-				j += 1
+	while #tokens > 0 do
 		local token = get()
 		printToken(token)
 		if token.kind == "IDENTIFIER" then
 			local nextToken = get()
 			if binaryOperators[nextToken.value] then
 				-- binary operator
 				local left = IdentifierExpr(token)
 				local operator = nextToken
 				local rightTokens = getUntilEndOfExpression()
 				local right = parse(rightTokens)[1]
 				print(operator)
 				table.insert(program, BinOpExpr(token, left, right, operator))
 			end
-			return tokens[j], j
+		else
-		end
+			print(colour.red "unexpected token", colour.yellow(token.kind))
-
+			exit(1)
 		local function parseCond(condTokens: { Token }): Expr
 			print("parsing cond", condTokens)
 			error("bruh")
 			local cond = parse(condTokens)
 			if #cond > 1 then
 				error(colour.red "too many exprs in cond")
 			elseif #cond < 1 then
 				error(colour.red "not enough exprs in cond")
 			end
 			return cond[1]
 		end
 		if token.kind == "INDENT" then
 			currentIndent += 1
 		elseif token.kind == "NEWLINE" then
 			currentIndent = 0
 		elseif token.kind == "KEYWORD" then
 			if token.value == "if" then
 				print(i)
 				local cond = getCond()
 				print(i)
 				local block = getBlock()
 				local expr = IfExpr(
 					token,
 					parseCond(cond),
 					BlockExpr(token, parse(block))
 				)
 				addExpr(expr)
 				-- elseif token.value == "elseif" then
 				-- 	local cond = getCond()
 				-- 	-- skip the newline
 				-- 	i += 1
 				-- 	local block = getBlock()
 				-- 	addExpr(
 				-- 		ElseIfExpr(
 				-- 			token,
 				-- 			parseCond(cond),
 				-- 			BlockExpr(token, parse(block))
 				-- 		)
 				-- 	)
 				-- elseif token.value == "else" then
 				-- 	-- skip newline
 				-- 	i += 1
 				-- 	local block = getBlock()
 				-- 	addExpr(ElseExpr(token, BlockExpr(token, parse(block))))
 				-- else
 				-- 	print(token)
 				-- 	error(colour.red "unknown token value " .. token.value)
 			end
 		elseif token.kind == "IDENTIFIER" then
 			-- identifier is at the start of an expression, it could be:
 			-- 1: a binop (next token is a text operator or operator
 			-- 3: a postfix op (next token is ++ or --)
 			-- 4: a function call
 			-- 5: standalone
 			-- after one 2am philosophical compiler thinking session, I've concluded that yes, an assignment is indeed a binop
 			-- skip the identifier
 			i += 1
 			local nextToken, advance = nextNonSpace()
 			if not nextToken then
 				-- standalone
 				i = advance
 				addExpr(IdentifierExpr(token))
 			elseif binaryOperators[nextToken.value] then
 				-- binop
 				i = advance
 				local cond = getCond()
 				addExpr(BinOpExpr(
 					token,
 					IdentifierExpr(token),
 					-- get condition tokens as rhs
 					parseCond(cond),
 					nextToken
 				))
 			elseif postfixOperators[nextToken.value] then
 				-- postfix
 				i = advance
 				addExpr(PostfixOpExpr(token, IdentifierExpr(token), nextToken))
 			else
 				i -= 1 -- getCond skips the identifier
 				local cond = getCond()
 				addExpr(FunctionCallExpr(token, token, parseCond(cond)))
 			end
 		elseif token.kind == "NUMBER" then
 			-- number is at the start of an expression, it could be:
 			-- 1: a binop (next token is a text operator or operator
 			-- 2: standalone
 			-- skip the number
 			i += 1
 			local nextToken, advance = nextNonSpace()
 			local function standalone()
 				i = advance
 				addExpr(NumberExpr(token))
 			end
 			if not nextToken or not binaryOperators[nextToken.value] then
 				standalone()
 			else
 				-- binop
 				i = advance
 				local cond = getCond()
 				addExpr(BinOpExpr(
 					token,
 					NumberExpr(token),
 					-- get condition tokens as rhs
 					parseCond(cond),
 					nextToken
 				))
 			end
 		elseif token.kind == "STRING" then
 			-- string is at the start of an expression, it could be:
 			-- 1: a binop (next token is a text operator or operator
 			-- 2: standalone
 			-- skip the string
 			i += 1
 			local nextToken, advance = nextNonSpace()
 			local function standalone()
 				i = advance
 				addExpr(StringExpr(token))
 			end
 			if not nextToken or not binaryOperators[nextToken.value] then
 				standalone()
 			else
 				-- binop
 				i = advance
 				addExpr(BinOpExpr(
 					token,
 					StringExpr(token),
 					-- get condition tokens as rhs
 					parseCond(getCond()),
 					nextToken
 				))
 			end
 		elseif token.kind ~= "SPACE" then
 			print(token)
 			error(colour.red "unknown token kind " .. token.kind)
 		end
 	end
@ -598,6 +479,7 @@ local function lex(source: { string }): { Token }
 		return tokens[#tokens - (n - 1)]
 	end
 	local line, column = 1, 0
 	local indent = 0
 	local function addToken(
 		kind: TokenKind,
@ -631,23 +513,28 @@ local function lex(source: { string }): { Token }
 			addToken("SPACE", " ")
 		elseif char == "\t" then
 			-- only if last line is a newline or an indent
-			if last(1).kind == "NEWLINE" or last(1).kind == "INDENT" then
+			if last(1).kind ~= "NEWLINE" and last(1).kind ~= "INDENT" then
 				addToken("INDENT", "\t")
 				column += 3
 			else
 				addToken("SPACE", "\t")
 				continue
 			end
-		elseif char == ";" then
+
-			-- parse till end of line
+			-- count how many tabs there are
-			i += 1 -- skip the semicolon
+			local tabs = 1
-			while i < len and source[i] ~= "\n" do
+			while source[i + tabs] == "\t" do
-				column += 1
+				tabs += 1
 				i += 1
 			end
 			column -= 1
 			i -= 1
-			-- I used to do something with it here but nah
+			local diff = tabs - indent
 			for _ = 1, math.abs(diff) do
 				if diff > 0 then
 					addToken("INDENT", "\t")
 					indent += 1
 				else
 					addToken("DEDENT", "\t")
 					indent -= 1
 				end
 			end
 		elseif char == '"' then
 			local startLine, startColumn = line, column
@ -668,37 +555,33 @@ local function lex(source: { string }): { Token }
 			addToken("STRING", stringLiteral, startLine, startColumn)
 		elseif char == "+" then
-			-- check if it's a ++ or a += or just a +
+			addToken("PLUS", "+")
 			if i + 1 < len and source[i + 1] == "+" then
 				addToken("PLUSPLUS", "++")
 				i += 1
 				column += 1
 			elseif i + 1 < len and source[i + 1] == "=" then
 				addToken("PLUSEQUALS", "+=")
 				i += 1
 				column += 1
 			else
 				addToken("PLUS", "+")
 			end
 		elseif char == "-" then
-			-- check if it's a -- or a -= or just a -
+			addToken("MINUS", "-")
 			if i + 1 < len and source[i + 1] == "-" then
 				addToken("MINUSMINUS", "--")
 				i += 1
 				column += 1
 			elseif i + 1 < len and source[i + 1] == "=" then
 				addToken("MINUSEQUALS", "-=")
 				i += 1
 				column += 1
 			else
 				addToken("MINUS", "-")
 			end
 		elseif char == "*" then
 			addToken("TIMES", "*")
 		elseif char == "/" then
 			addToken("DIVIDE", "/")
 		elseif char == "%" then
 			addToken("MODULO", "%")
 		elseif char == "^" then
 			addToken("EXPONENT", "^")
 		elseif char == ":" then
 			addToken("COLON", ":")
 		elseif char == ";" then
 			addToken("SEMICOLON", ";")
 		elseif char == "(" then
 			addToken("LPAREN", "(")
 		elseif char == ")" then
 			addToken("RPAREN", ")")
 		elseif char == "{" then
 			addToken("LBRACE", "{")
 		elseif char == "}" then
 			addToken("RBRACE", "}")
 		elseif char == "[" then
 			addToken("LBRACKET", "[")
 		elseif char == "]" then
 			addToken("RBRACKET", "]")
 		elseif char ~= " " then
 			if char >= "0" and char <= "9" then
 				local startLine, startColumn = line, column
@ -776,6 +659,36 @@ local function lex(source: { string }): { Token }
 		end
 	end
 	-- postprocessing
 	-- remove leading and trailing newlines
 	while tokens[1].kind == "NEWLINE" do
 		table.remove(tokens, 1)
 	end
 	while tokens[#tokens].kind == "NEWLINE" do
 		table.remove(tokens, #tokens)
 	end
 	-- check if the number of indents and dedents are the same, if not add the remaining
 	local indents, dedents = 0, 0
 	for _, token in tokens do
 		if token.kind == "INDENT" then
 			indents += 1
 		elseif token.kind == "DEDENT" then
 			dedents += 1
 		end
 	end
 	if dedents > indents then
 		-- huh?????
 		print(colour.red "too many dedents")
 		exit(1)
 	end
 	while indents > dedents do
 		addToken("DEDENT", "\t")
 		dedents += 1
 	end
 	return tokens
 end
--- a/Script/test.melt
+++ b/Script/test.melt
@ -1,2 +1,6 @@
-y = if x is 6
+y = if x is 6:
 	print "yes"
 	print "test"
 else
 	print "no"
 	print "test"