Add compiler

This commit is contained in:
Asraelite 2024-05-17 18:37:51 +02:00
parent 476972f85a
commit 3f3125ef43
31 changed files with 2625 additions and 3 deletions

9
compiler/Cargo.toml Normal file
View file

@ -0,0 +1,9 @@
[package]
name = "compiler"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
nom = "7.1.3"

View file

@ -0,0 +1,96 @@
# target parva_0_1
li sp, stack
fn tokenize(input) {
let current_token_ptr = input - 1;
let temp = -1;
push(temp); // mark the beginning of the token stack
loop {
current_token_ptr += 1;
let current_token = rw(current_token_ptr);
if current_token == 0 {
goto(tokenize_end);
}
if current_token == '0' {
goto(tokenize_symbol);
}
if current_token == '9' {
goto(tokenize_symbol);
}
let number_start_addr = sp
current_token_ptr
let token_value = current_token - '0';
lw x3, x2
blt x3, x4, tokenize__number_gather_end
bgt x3, x5, tokenize__number_gather_end
b tokenize__number_gather
tokenize__number_gather_end:
li x0, 0 # x0 = sum
li x7, 1 # x7 = power of 10
.align 2
tokenize__number_sum:
pop x3
mulu x3, x3, x7
add x0, x0, x3
mului x7, x7, 10
bne x6, sp, tokenize__number_sum
li x3, 1 # token type = 1 = number
push x3
push x0
b tokenize__loop
tokenize__symbol:
xori x3, x3, 42 # todo, set to appropriate value for hashing symbols
lw x3, tokenize__symbol_hashtable(x3)
beqz x3, error_invalid_input
push x3
b tokenize__loop
}
tokenize__shunting_yard:
pop x3
bnez x3, 3
}
b end
tokenize__output_queue:
.repeat 0 40
tokenize__symbol_hashtable:
.data 0
.data 1
.repeat 0 30 # todo
error_invalid_input:
li a0, error_message_invalid_input
li a1, 1
call print
b end
# function
print:
# todo
ret
end:
wfi
input_string:
.string_encoding terminal
.string "125+23*8"
.string "\0"
error_message_invalid_input:
.string_encoding ascii # todo, use correct encoding
.string "Invalid input\0"
stack:

View file

@ -0,0 +1,6 @@
x = (y + 3) + 5;
y += x;
z = foo;
fn test() {
poop = moop;
}

1
compiler/rustfmt.toml Normal file
View file

@ -0,0 +1 @@
hard_tabs = true

137
compiler/src/compiler.rs Normal file
View file

@ -0,0 +1,137 @@
use crate::{
parser::{Expression, Statement},
CompilationError,
};
struct Context {
temporary_counter: usize,
}
impl Context {
fn new() -> Self {
Self {
temporary_counter: 0,
}
}
fn new_temporary(&mut self) -> String {
let result = format!("__temp{}", self.temporary_counter);
self.temporary_counter += 1;
result
}
}
pub fn compile(ast: Statement) -> Result<String, CompilationError> {
let mut context = Context::new();
println!("initial: {:#?}\n", ast);
let ast = ast_pass_0(ast)?;
println!("pass 0: {:#?}\n", ast);
let ast = ast_pass_1(&mut context, vec![ast])?;
println!("pass 1: {:#?}\n", ast);
Ok(format!("{:?}\n", ast))
}
/// Pass 0
///
/// Rewrites compound assignments into simple assignments, e.g. `a += 1` to `a = a + 1`
fn ast_pass_0(ast: Statement) -> Result<Statement, CompilationError> {
let result = match ast {
Statement::Block(inner) => Statement::Block(
inner
.into_iter()
.map(ast_pass_0)
.collect::<Result<Vec<_>, _>>()?,
),
Statement::AddAssign(name, expr) => Statement::Assign(
name.clone(),
Expression::Add(Box::new(Expression::Identifier(name)), Box::new(expr)),
),
statement => statement,
};
Ok(result)
}
/// Pass 1
///
/// Expands nested expressions into simple expressions,
/// e.g. `a = (x + y) + z;` to `temp0 = x + y; a = temp0 + z;`
fn ast_pass_1(
context: &mut Context,
statements: Vec<Statement>,
) -> Result<Vec<Statement>, CompilationError> {
let mut statements_out = Vec::new();
for statement in statements {
match statement {
Statement::Block(inner) => {
statements_out.push(Statement::Block(ast_pass_1(context, inner)?));
}
Statement::Assign(name, expr) => {
let (mut expression_statements, expression) =
flatten_expression(context, expr.clone())?;
statements_out.extend(expression_statements);
statements_out.push(Statement::Assign(name.clone(), expression));
}
statement => statements_out.push(statement),
};
}
Ok(statements_out)
}
fn flatten_expression(
context: &mut Context,
expression: Expression,
) -> Result<(Vec<Statement>, Expression), CompilationError> {
let mut statements = Vec::new();
let result = match expression {
Expression::Identifier(name) => (vec![], Expression::Identifier(name)),
Expression::Number(name) => (vec![], Expression::Number(name)),
Expression::Add(left, right) => {
let (left_statements, left) = flatten_expression(context, *left)?;
statements.extend(left_statements);
let (right_statements, right) = flatten_expression(context, *right)?;
statements.extend(right_statements);
let temp_name = context.new_temporary();
let statement = Statement::Assign(
temp_name.clone(),
Expression::Add(Box::new(left), Box::new(right)),
);
statements.push(statement);
(statements, Expression::Identifier(temp_name))
}
expression => (vec![], expression),
};
Ok(result)
}
/// Pass 2
///
/// Convert to IR
fn ast_pass_2(
context: &mut Context,
statements: Vec<Statement>,
) -> Result<Vec<Statement>, CompilationError> {
let mut statements_out = Vec::new();
for statement in statements {
match statement {
Statement::Block(inner) => {
statements_out.push(Statement::Block(ast_pass_1(context, inner)?));
}
Statement::Assign(name, expr) => {
let (mut expression_statements, expression) =
flatten_expression(context, expr.clone())?;
statements_out.extend(expression_statements);
statements_out.push(Statement::Assign(name.clone(), expression));
}
statement => statements_out.push(statement),
};
}
Ok(statements_out)
}

View file

@ -0,0 +1,11 @@
pub struct Variable {
name: String,
}
pub enum PseudoInstruction {
Call(Variable),
Return,
Add(Variable, Variable, Variable),
Sub(Variable, Variable, Variable),
Li(Variable, i32),
}

186
compiler/src/lexer.rs Normal file
View file

@ -0,0 +1,186 @@
use crate::CompilationError;
#[derive(Debug, Clone)]
pub enum Token {
Identifier(String),
Character(char),
Number(i32),
Operator(Operator),
Colon,
AtSign,
OpenParenthesis,
CloseParenthesis,
OpenBrace,
CloseBrace,
Assignment(Assignment),
Comparison(Comparison),
Semicolon,
Keyword(Keyword),
}
#[derive(Debug, Copy, Clone)]
pub enum Operator {
Plus,
Minus,
Star,
Slash,
}
#[derive(Debug, Copy, Clone)]
pub enum Keyword {
Let,
Return,
}
#[derive(Debug, Copy, Clone)]
pub enum Assignment {
Assign,
AddAssign,
}
#[derive(Debug, Copy, Clone)]
pub enum Comparison {
Equals,
GreaterThan,
LessThan,
}
#[derive(Debug, Clone)]
pub struct LexerConfiguration {}
struct Lexer {
configuration: LexerConfiguration,
input: Vec<char>,
position: usize,
line: usize,
line_start_position: usize,
}
impl Lexer {
fn new(configuration: LexerConfiguration, input: &str) -> Self {
Self {
configuration: LexerConfiguration {},
input: input.chars().collect(),
position: 0,
line: 0,
line_start_position: 0,
}
}
fn next_token(&mut self) -> Result<Option<Token>, CompilationError> {
while self.position < self.input.len() {
let next = &self.input[self.position..];
self.position += 1;
type Tk = Token;
let token = match next {
['=', '=', ..] => Tk::Comparison(Comparison::Equals),
['>', ..] => Tk::Comparison(Comparison::GreaterThan),
['<', ..] => Tk::Comparison(Comparison::LessThan),
['=', ..] => Tk::Assignment(Assignment::Assign),
['+', '=', ..] => Tk::Assignment(Assignment::AddAssign),
['+', ..] => Tk::Operator(Operator::Plus),
['-', ..] => Tk::Operator(Operator::Minus),
['*', ..] => Tk::Operator(Operator::Star),
['/', ..] => Tk::Operator(Operator::Slash),
['(', ..] => Tk::OpenParenthesis,
[')', ..] => Tk::CloseParenthesis,
['a'..='z' | 'A'..='Z' | '_', ..] => {
let start = self.position - 1;
while self.position < self.input.len()
&& (self.input[self.position].is_alphanumeric()
|| self.input[self.position] == '_')
{
self.position += 1;
}
let identifier = self.input[start..self.position].iter().collect::<String>();
match identifier.as_str() {
"let" => Token::Keyword(Keyword::Let),
"return" => Token::Keyword(Keyword::Return),
_ => Token::Identifier(identifier),
}
}
['{', ..] => Tk::OpenBrace,
['}', ..] => Tk::CloseBrace,
[';', ..] => Tk::Semicolon,
[':', ..] => Tk::Colon,
['@', ..] => Tk::AtSign,
['\'', ..] => {
let start = self.position;
while self.position < self.input.len() && self.input[self.position] != '\'' {
self.position += 1;
}
if self.position >= self.input.len() {
return Err(CompilationError {
message: format!("Expected closing single quote"),
line: self.line,
column: start - self.line_start_position,
});
}
self.position += 1;
let character = self.input[start..self.position - 1]
.iter()
.collect::<String>()
.chars()
.next()
.unwrap();
Token::Character(character)
}
['0'..='9', ..] => {
let start = self.position - 1;
while self.position < self.input.len() && self.input[self.position].is_digit(10)
{
self.position += 1;
}
let number: i32 = self.input[start..self.position]
.iter()
.collect::<String>()
.parse::<i32>()
.map_err(|err| CompilationError {
message: format!("Expected closing single quote"),
line: self.line,
column: start - self.line_start_position,
})?;
Token::Number(number)
}
['\n', ..] => {
self.line += 1;
self.line_start_position = self.position;
continue;
}
[' ', '\t', ..] => continue,
_ => continue,
};
return Ok(Some(token));
}
Ok(None)
}
}
pub fn lex(input: &str, configuration: LexerConfiguration) -> Result<Vec<Token>, String> {
let mut lexer = Lexer::new(configuration, input);
let mut tokens = Vec::new();
loop {
let token = match lexer.next_token() {
Ok(Some(token)) => token,
Ok(None) => break,
Err(CompilationError {
message,
line,
column,
}) => {
return Err(format!(
"Parsing failed at line {}:{}: {}",
line + 1, column + 1, message
));
}
};
tokens.push(token);
}
Ok(tokens)
}

71
compiler/src/main.rs Normal file
View file

@ -0,0 +1,71 @@
#![allow(unused)]
/*
can only reorder instructions within a block
branch points delimit blocks
*/
use std::env;
use std::fs;
use std::io::{self, Read};
mod compiler;
mod parser;
#[derive(Debug)]
pub struct CompilationError {
pub message: String,
pub line: usize,
pub column: usize,
}
impl CompilationError {
pub fn new(message: String, line: usize, column: usize) -> Self {
Self {
message,
line,
column,
}
}
}
fn main() {
let args: Vec<String> = env::args().collect();
let input = if args.len() > 1 {
if ((args[1] == "-s") || (args[1] == "--source") && args.len() > 2) {
args[2].to_owned()
} else {
let filename = &args[1];
if filename == "--" {
let mut buffer = String::new();
io::stdin()
.read_to_string(&mut buffer)
.expect("Failed to read from stdin");
buffer
} else {
fs::read_to_string(filename).expect("Failed to read file")
}
}
} else {
panic!("Expected a filename or '--' as argument");
};
let parse_result = match parser::parse(&input) {
Ok(expr) => expr,
Err(err) => {
eprintln!("Error: {:?}", err);
std::process::exit(1);
}
};
let compile_result = match compiler::compile(parse_result) {
Ok(expr) => expr,
Err(err) => {
eprintln!("Error: {:?}", err);
std::process::exit(1);
}
};
println!("Compiled: {:?}", compile_result);
}

378
compiler/src/parser.rs Normal file
View file

@ -0,0 +1,378 @@
use crate::CompilationError;
mod asm;
use nom::{
branch::alt,
bytes::complete::{tag, take_while},
character::complete::{alpha1, alphanumeric1, char, one_of},
combinator::{all_consuming, complete, map, map_res, opt, recognize},
error::{dbg_dmp, ParseError},
multi::{many0, many1, separated_list0},
sequence::{delimited, preceded, terminated, tuple},
Finish, IResult,
};
#[derive(Debug, Clone)]
pub enum Statement {
Block(Vec<Statement>),
Label(String),
Assign(String, Expression),
AddAssign(String, Expression),
SubAssign(String, Expression),
MulAssign(String, Expression),
DivAssign(String, Expression),
FunctionDeclaration(String, Vec<String>, Box<Statement>),
SubroutineDeclaration(String, Box<Statement>),
}
#[derive(Debug, Copy, Clone)]
pub enum BinOp {
Add,
Sub,
Mul,
Div,
Sll,
Srl,
Sra,
BitAnd,
BitOr,
BitXor,
Index,
}
#[derive(Debug, Clone)]
pub enum Expression {
Number(i64),
Identifier(String),
FunctionCall(String, Vec<Expression>),
Add(Box<Expression>, Box<Expression>),
Sub(Box<Expression>, Box<Expression>),
Mul(Box<Expression>, Box<Expression>),
Div(Box<Expression>, Box<Expression>),
Sll(Box<Expression>, Box<Expression>),
Srl(Box<Expression>, Box<Expression>),
Sra(Box<Expression>, Box<Expression>),
BitNeg(Box<Expression>, Box<Expression>),
BitAnd(Box<Expression>, Box<Expression>),
BitOr(Box<Expression>, Box<Expression>),
BitXor(Box<Expression>, Box<Expression>),
Deref(Box<Expression>),
Index(Box<Expression>, Box<Expression>),
}
pub fn parse(input: &str) -> Result<Statement, CompilationError> {
let parse_result = all_consuming(complete(program))(input).finish();
let ast = match parse_result {
Ok((_, ast)) => ast,
Err(err) => {
// let (line, column) = get_line_and_column(input);
let (line, column) = (0, 0); // TODO
return Err(CompilationError::new(
format!("Failed to parse input: {:?}", err),
line,
column,
));
}
};
Ok(ast)
}
fn expression(input: &str) -> IResult<&str, Expression> {
alt((
map_res(
tuple((
primitive_expression,
whitespace,
tag("+"),
whitespace,
expression,
)),
|(left, _, _, _, right)| {
Ok::<_, nom::error::Error<String>>(Expression::Add(Box::new(left), Box::new(right)))
},
),
map_res(
tuple((
primitive_expression,
whitespace,
tag("-"),
whitespace,
expression,
)),
|(left, _, _, _, right)| {
Ok::<_, nom::error::Error<String>>(Expression::Sub(Box::new(left), Box::new(right)))
},
),
map_res(
tuple((
primitive_expression,
whitespace,
tag("*"),
whitespace,
expression,
)),
|(left, _, _, _, right)| {
Ok::<_, nom::error::Error<String>>(Expression::Mul(Box::new(left), Box::new(right)))
},
),
map_res(
tuple((
primitive_expression,
whitespace,
tag("/"),
whitespace,
expression,
)),
|(left, _, _, _, right)| {
Ok::<_, nom::error::Error<String>>(Expression::Div(Box::new(left), Box::new(right)))
},
),
map_res(
tuple((
primitive_expression,
whitespace,
tag("<<"),
whitespace,
expression,
)),
|(left, _, _, _, right)| {
Ok::<_, nom::error::Error<String>>(Expression::Sll(Box::new(left), Box::new(right)))
},
),
map_res(
tuple((
primitive_expression,
whitespace,
tag(">>"),
whitespace,
expression,
)),
|(left, _, _, _, right)| {
Ok::<_, nom::error::Error<String>>(Expression::Srl(Box::new(left), Box::new(right)))
},
),
map_res(
tuple((
primitive_expression,
whitespace,
tag("&"),
whitespace,
expression,
)),
|(left, _, _, _, right)| {
Ok::<_, nom::error::Error<String>>(Expression::BitAnd(
Box::new(left),
Box::new(right),
))
},
),
map_res(
tuple((
primitive_expression,
whitespace,
tag("|"),
whitespace,
expression,
)),
|(left, _, _, _, right)| {
Ok::<_, nom::error::Error<String>>(Expression::BitOr(
Box::new(left),
Box::new(right),
))
},
),
map_res(
tuple((
primitive_expression,
whitespace,
tag("^"),
whitespace,
expression,
)),
|(left, _, _, _, right)| {
Ok::<_, nom::error::Error<String>>(Expression::BitXor(
Box::new(left),
Box::new(right),
))
},
),
map_res(
tuple((
identifier,
whitespace,
delimited(
tag("("),
separated_list0(tag(","), delimited(whitespace, expression, whitespace)),
tag(")"),
),
)),
|(name, _, arguments)| {
Ok::<_, nom::error::Error<String>>(Expression::FunctionCall(
name.to_string(),
arguments,
))
},
),
map_res(
tuple((
tag("*"),
whitespace,
expression,
)),
|(_, _, value)| {
Ok::<_, nom::error::Error<String>>(Expression::Deref(Box::new(value)))
},
),
primitive_expression,
))(input)
}
fn primitive_expression(input: &str) -> IResult<&str, Expression> {
alt((
variable,
number,
map_res(
tuple((tag("("), whitespace, expression, whitespace, tag(")"))),
|(_, _, expr, _, _)| Ok::<_, nom::error::Error<String>>(expr),
),
))(input)
}
fn statement(input: &str) -> IResult<&str, Statement> {
delimited(whitespace, alt((block, assignment, function)), whitespace)(input)
}
fn block(input: &str) -> IResult<&str, Statement> {
let (input, (_, _, statements, _, _)) =
tuple((tag("{"), whitespace, many0(statement), whitespace, tag("}")))(input)?;
Ok((input, Statement::Block(statements)))
}
fn program(input: &str) -> IResult<&str, Statement> {
let (input, (statements)) = many0(statement)(input)?;
Ok((input, Statement::Block(statements)))
}
fn assignment(input: &str) -> IResult<&str, Statement> {
let (input, (name, _, operator, _, expr, _)) = tuple((
identifier,
whitespace,
opt(one_of("+-/*")),
tag("="),
delimited(whitespace, expression, whitespace),
tag(";"),
))(input)?;
let name = name.to_string();
let statement = match operator {
Some('+') => Statement::AddAssign(name, expr),
Some('-') => Statement::SubAssign(name, expr),
Some('/') => Statement::SubAssign(name, expr),
Some('*') => Statement::SubAssign(name, expr),
None => Statement::Assign(name, expr),
_ => unreachable!(),
};
Ok((input, statement))
}
fn function(input: &str) -> IResult<&str, Statement> {
let (input, (_, name, params, _, body)) = tuple((
tag("fn"),
delimited(whitespace, identifier, whitespace),
delimited(tag("("), separated_list0(tag(","), identifier), tag(")")),
whitespace,
block,
))(input)?;
Ok((
input,
Statement::FunctionDeclaration(
name.to_string(),
params.into_iter().map(String::from).collect(),
Box::new(body),
),
))
}
fn variable(input: &str) -> IResult<&str, Expression> {
map(identifier, |name| Expression::Identifier(name.to_string()))(input)
}
fn identifier(input: &str) -> IResult<&str, &str> {
recognize(tuple((alt((tag("_"), alpha1)), many0(alphanumeric1))))(input)
}
fn number(input: &str) -> IResult<&str, Expression> {
let (input, number) = map(
alt((
hexadecimal_number,
octal_number,
binary_number,
decimal_number,
)),
|number| Expression::Number(number),
)(input)?;
Ok((input, number))
}
fn hexadecimal_number(input: &str) -> IResult<&str, i64> {
map_res(
preceded(
alt((tag("0x"), tag("0X"))),
recognize(many1(terminated(
one_of("0123456789abcdefABCDEF"),
many0(char('_')),
))),
),
|out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 16),
)(input)
}
fn octal_number(input: &str) -> IResult<&str, i64> {
map_res(
preceded(
alt((tag("0o"), tag("0O"))),
recognize(many1(terminated(one_of("01234567"), many0(char('_'))))),
),
|out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 8),
)(input)
}
fn binary_number(input: &str) -> IResult<&str, i64> {
map_res(
preceded(
alt((tag("0b"), tag("0B"))),
recognize(many1(terminated(one_of("01"), many0(char('_'))))),
),
|out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 2),
)(input)
}
fn decimal_number(input: &str) -> IResult<&str, i64> {
map_res(
recognize(many1(terminated(one_of("0123456789"), many0(char('_'))))),
|out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 10),
)(input)
}
fn whitespace(i: &str) -> IResult<&str, &str> {
recognize(many0(one_of(" \n\t")))(i)
}
// fn expect<'a, F, E, T>(parser: F, error_msg: E) -> impl Fn(&'a str) -> IResult<Option<T>, T>
// where
// F: Fn(&'a str) -> IResult<T, T>,
// E: ToString,
// {
// move |input| match parser(input) {
// Ok((remaining, out)) => Ok((remaining, Some(out))),
// Err(nom::Err::Error((input, _))) | Err(nom::Err::Failure((input, _))) => {
// let err = Error(input.to_range(), error_msg.to_string());
// input.extra.report_error(err); // Push error onto stack.
// Ok((input, None)) // Parsing failed, but keep going.
// }
// Err(err) => Err(err),
// }
// }

View file

@ -0,0 +1,20 @@
use nom::{
branch::alt,
bytes::complete::tag,
character::complete::{alphanumeric1, char, one_of},
combinator::{map, map_res, recognize},
multi::{many0, many1, separated_list0},
sequence::{preceded, terminated, tuple},
IResult,
};
pub enum Instruction {
Add(Operand, Operand, Operand),
Sub(Operand, Operand, Operand),
Beqz(Operand, Operand),
}
pub enum Operand {
Direct(u8),
Identifier(String),
}