From 34d01f03ec4aea48067b00a587b2b7e4df0e3408 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Wed, 1 Oct 2025 20:10:35 +1000 Subject: [PATCH 01/56] WIP: Refactor tokeniser and add broken test cases, refactor parser slightly --- compiler/src/parser.rs | 62 ++++++------- compiler/src/tokeniser.rs | 190 +++++++++++++++++++++----------------- 2 files changed, 131 insertions(+), 121 deletions(-) diff --git a/compiler/src/parser.rs b/compiler/src/parser.rs index 4b8b809..7fb4156 100644 --- a/compiler/src/parser.rs +++ b/compiler/src/parser.rs @@ -995,22 +995,19 @@ impl Expression { } current_sign = None; } - (Some(sign), Token::True | Token::False) => { - let parsed_int = match &tokens[i] { + (Some(sign), token @ (Token::True | Token::False)) => { + let parsed_int = match token { Token::True => 1, - Token::False => 0, - _ => r_panic!( - "Unreachable error occured while parsing boolean value: {tokens:#?}" - ), + Token::False | _ => 0, }; i += 1; - match sign { - Sign::Positive => summands.push(Expression::NaturalNumber(parsed_int)), - Sign::Negative => summands.push(Expression::SumExpression { + summands.push(match sign { + Sign::Positive => Expression::NaturalNumber(parsed_int), + Sign::Negative => Expression::SumExpression { sign: Sign::Negative, summands: vec![Expression::NaturalNumber(parsed_int)], - }), - } + }, + }); current_sign = None; } (Some(sign), Token::Character(chr)) => { @@ -1022,25 +1019,25 @@ impl Expression { ); i += 1; - match sign { - Sign::Positive => summands.push(Expression::NaturalNumber(chr_int)), - Sign::Negative => summands.push(Expression::SumExpression { + summands.push(match sign { + Sign::Positive => Expression::NaturalNumber(chr_int), + Sign::Negative => Expression::SumExpression { sign: Sign::Negative, summands: vec![Expression::NaturalNumber(chr_int)], - }), - } + }, + }); current_sign = None; } (Some(sign), Token::Name(_) | Token::Asterisk) => { let (var, len) = parse_var_target(&tokens[i..])?; i += len; - match sign { - Sign::Positive => summands.push(Expression::VariableReference(var)), - Sign::Negative => summands.push(Expression::SumExpression { + summands.push(match sign { + Sign::Positive => Expression::VariableReference(var), + Sign::Negative => Expression::SumExpression { sign: Sign::Negative, summands: vec![Expression::VariableReference(var)], - }), - } + }, + }); current_sign = None; } (Some(sign), Token::OpenParenthesis) => { @@ -1081,23 +1078,20 @@ impl Expression { }); current_sign = None; } - _ => { - r_panic!( - "Unexpected token {:#?} found in expression: {tokens:#?}", - tokens[i] - ); + token => { + r_panic!("Unexpected token {token:#?} found in expression: {tokens:#?}"); } } } - match summands.len() { - 1 => Ok(summands.into_iter().next().unwrap()), - 1.. => Ok(Expression::SumExpression { + Ok(match summands.len() { + 1 => summands.into_iter().next().unwrap(), + 1.. => Expression::SumExpression { sign: Sign::Positive, summands, - }), + }, _ => r_panic!("Expected value in expression: {tokens:#?}"), - } + }) } /// flip the sign of an expression, equivalent to `x => -(x)` @@ -1283,7 +1277,6 @@ pub enum Clause { InlineBrainfuck { location_specifier: LocationSpecifier, clobbered_variables: Vec, - // TODO: make this support embedded mastermind operations: Vec, }, } @@ -1321,10 +1314,7 @@ pub enum LocationSpecifier { } impl LocationSpecifier { fn is_none(&self) -> bool { - match self { - LocationSpecifier::None => true, - _ => false, - } + matches!(self, LocationSpecifier::None) } } diff --git a/compiler/src/tokeniser.rs b/compiler/src/tokeniser.rs index 9bc72ce..a6338ec 100644 --- a/compiler/src/tokeniser.rs +++ b/compiler/src/tokeniser.rs @@ -1,8 +1,6 @@ -// TODO: refactor this tokeniser, needs some fixes and could be made simpler/cleaner - use regex_lite::Regex; -use crate::macros::macros::r_assert; +use crate::macros::macros::{r_assert, r_panic}; pub fn tokenise(source: &String) -> Result, String> { let stripped = source @@ -16,16 +14,12 @@ pub fn tokenise(source: &String) -> Result, String> { (";", Token::Semicolon), ("output", Token::Output), ("input", Token::Input), - // ("#debug", Token::Debug), - // ("let", Token::Let), ("cell", Token::Cell), ("struct", Token::Struct), ("=", Token::EqualsSign), ("while", Token::While), ("drain", Token::Drain), ("into", Token::Into), - // ("clear", Token::Clear), - // ("loop", Token::Loop), ("else", Token::Else), ("copy", Token::Copy), ("bf", Token::Bf), @@ -33,17 +27,7 @@ pub fn tokenise(source: &String) -> Result, String> { ("assert", Token::Assert), ("equals", Token::Equals), ("unknown", Token::Unknown), - // ("call", Token::Call), - // ("bool", Token::Bool), - // ("free", Token::Free), - // ("push", Token::Push), - // ("deal", Token::Deal), - // ("def", Token::Def), ("fn", Token::Fn), - // ("int", Token::Int), - // ("add", Token::Add), - // ("sub", Token::Sub), - // ("pop", Token::Pop), ("if", Token::If), ("not", Token::Not), ("else", Token::Else), @@ -79,76 +63,59 @@ pub fn tokenise(source: &String) -> Result, String> { while chr_idx < stripped.len() { let remaining = &stripped[chr_idx..]; - let mut found = false; - - ///////// - if let Some(num_capture) = num_re.captures(remaining) { - found = true; - let substring = String::from(&num_capture[0]); + if let Some(substring) = num_re + .captures(remaining) + .map(|num_capture| String::from(&num_capture[0])) + { chr_idx += substring.len(); tokens.push(Token::Digits(substring)); - } else if let Some(name_capture) = name_re.captures(remaining) { - found = true; - let substring = String::from(&name_capture[0]); - if mappings - .iter() - // this could be made more efficient if we had a table of keywords vs symbols - .find(|(keyword, _)| substring == *keyword) - .is_some() - { - found = false; - } else { - chr_idx += substring.len(); - tokens.push(Token::Name(substring)); - } - } else if let Some(str_capture) = str_re.captures(remaining) { - found = true; - let substring = String::from(&str_capture[0]); - // not the most efficient way, this simply removes the quote characters - // could refactor this + } else if let Some(substring) = name_re + .captures(remaining) + .map(|name_capture| String::from(&name_capture[0])) + .take_if(|substring| { + mappings + .iter() + .find(|(keyword, _)| substring == *keyword) + .is_none() + }) { + chr_idx += substring.len(); + tokens.push(Token::Name(substring)); + } else if let Some(substring) = str_re + .captures(remaining) + .map(|str_capture| String::from(&str_capture[0])) + { chr_idx += substring.len(); - let unescaped: String = serde_json::from_str(&substring) - .or(Err("Could not unescape string literal in tokenisation due to serde error, this should never occur."))?; + let unescaped = serde_json::from_str(&substring) + .or(Err("Could not unescape string literal in tokenisation \ +due to serde error, this should never occur."))?; tokens.push(Token::String(unescaped)); - } else if let Some(chr_capture) = chr_re.captures(remaining) { - found = true; - let chr_literal = String::from(&chr_capture[0]); - // see above - chr_idx += chr_literal.len(); - // this code sucks, TODO: refactor - // make a new double-quoted string because serde json doesn't like single quotes and I can't be bothered making my own unescaping function - let escaped_string = - String::new() + "\"" + &chr_literal[1..(chr_literal.len() - 1)] + "\""; + } else if let Some(substring) = chr_re + .captures(remaining) + .map(|chr_capture| String::from(&chr_capture[0])) + { + chr_idx += substring.len(); + // hack: replace single quotes with double quotes, then use serde to unescape all the characters + let escaped_string = String::new() + "\"" + &substring[1..(substring.len() - 1)] + "\""; let unescaped: String = serde_json::from_str(&escaped_string) - .or(Err("Could not unescape character literal in tokenisation due to serde error, this should never occur."))?; - // might need to change this for escaped characters (TODO) + .or(Err("Could not unescape character literal in tokenisation \ +due to serde error, this should never occur."))?; + r_assert!(unescaped.len() == 1, "Character literals must be length 1"); tokens.push(Token::Character(unescaped.chars().next().unwrap())); + } else if let Some((text, token)) = mappings + .iter() + .find(|(text, _)| remaining.starts_with(text)) + { + tokens.push(token.clone()); + chr_idx += (*text).len(); + } else { + r_panic!("Unknown token found while tokenising program: \"{remaining}\""); } - ///////// - - if !found { - for (text, token) in mappings.iter() { - if remaining.starts_with(*text) { - tokens.push(token.clone()); - chr_idx += (*text).len(); - found = true; - break; - } - } - } - r_assert!( - found, - "Unknown token found while tokenising program: \"{remaining}\"" - ); } Ok(tokens .into_iter() - .filter(|t| match t { - Token::None => false, - _ => true, - }) + .filter(|t| !matches!(t, Token::None)) // stick a None token on the end to fix some weird parsing errors (seems silly but why not?) .chain([Token::None]) .collect()) @@ -175,19 +142,13 @@ pub enum Token { None, Output, Input, - // Def, Fn, - // Let, Cell, Struct, - // Assert, - // Free, While, If, Not, Else, - // Loop, - // Break, OpenBrace, ClosingBrace, OpenSquareBracket, @@ -208,10 +169,6 @@ pub enum Token { Assert, Equals, Unknown, - // Push, - // Pop, - // Deal, - // Debug, Name(String), Digits(String), String(String), @@ -224,3 +181,66 @@ pub enum Token { Semicolon, UpToken, } + +mod tokeniser_tests { + use crate::tokeniser::{tokenise, Token}; + + fn _character_literal_test(input_str: &str, desired_output: &[Token]) { + let input_string = String::from(input_str); + let actual_output = tokenise(&input_string).unwrap(); + println!("desired: {desired_output:#?}"); + println!("actual: {actual_output:#?}"); + assert!(actual_output.iter().eq(desired_output)); + } + + #[test] + fn character_literals_1() { + _character_literal_test( + r#"'a' 'b' 'c' ' '"#, + &[ + Token::Character('a'), + Token::Character('b'), + Token::Character('c'), + Token::Character(' '), + // TODO: remove this None, fix the code that needs it + Token::None, + ], + ); + } + + #[test] + fn character_literals_2() { + _character_literal_test( + r#"'\n'"#, + &[ + Token::Character('\n'), + // TODO: remove this None, fix the code that needs it + Token::None, + ], + ); + } + + #[test] + fn character_literals_3() { + _character_literal_test( + r#"'"'"#, + &[ + Token::Character('"'), + // TODO: remove this None, fix the code that needs it + Token::None, + ], + ); + } + + #[test] + fn character_literals_4() { + _character_literal_test( + r#"'\''"#, + &[ + Token::Character('\''), + // TODO: remove this None, fix the code that needs it + Token::None, + ], + ); + } +} From 2d04f7c38411519f180f799822d3390351cb5062 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Wed, 8 Oct 2025 17:44:57 +1100 Subject: [PATCH 02/56] Remove comments and fix broken test config --- compiler/src/brainfuck.rs | 2 +- compiler/src/brainfuck_optimiser.rs | 2 +- compiler/src/builder.rs | 2 +- compiler/src/constants_optimiser.rs | 37 +++++++++++++++++----------- compiler/src/lib.rs | 2 ++ compiler/src/mastermind_optimiser.rs | 6 ----- compiler/src/parser.rs | 2 -- compiler/src/tests.rs | 4 +-- compiler/src/tokeniser.rs | 1 + 9 files changed, 31 insertions(+), 27 deletions(-) delete mode 100644 compiler/src/mastermind_optimiser.rs diff --git a/compiler/src/brainfuck.rs b/compiler/src/brainfuck.rs index eae1b5b..6f41d39 100644 --- a/compiler/src/brainfuck.rs +++ b/compiler/src/brainfuck.rs @@ -381,7 +381,7 @@ impl BVM { } #[cfg(test)] -pub mod tests { +pub mod bvm_tests { // TODO: add unit tests for Tape use super::*; diff --git a/compiler/src/brainfuck_optimiser.rs b/compiler/src/brainfuck_optimiser.rs index 0072896..97e6af5 100644 --- a/compiler/src/brainfuck_optimiser.rs +++ b/compiler/src/brainfuck_optimiser.rs @@ -205,7 +205,7 @@ fn optimise_subset(run: Vec, exhaustive: bool) -> Vec { } #[cfg(test)] -mod tests { +mod bf_optimiser_tests { use crate::builder::BrainfuckOpcodes; use super::*; diff --git a/compiler/src/builder.rs b/compiler/src/builder.rs index bf35cc4..8e7318c 100644 --- a/compiler/src/builder.rs +++ b/compiler/src/builder.rs @@ -214,7 +214,7 @@ outside of loop it was allocated" // not sure if these optimisations should be in the builder step or in the compiler if self.config.optimise_constants { // ops.move_to_cell(&mut head_pos, cell); - // TODO: algorithm that finds the best combo of products and constants to make the number to minimise bf code + // here we use an algorithm that finds the best combo of products and constants to make the number to minimise bf code // first we get the closest allocated cell so we can calculate the distance cost of multiplying // TODO: instead find the nearest zero cell, doesn't matter if allocated or not let temp_cell = allocator.allocate_temp_cell(cell); diff --git a/compiler/src/constants_optimiser.rs b/compiler/src/constants_optimiser.rs index 3ecf5ea..63097f8 100644 --- a/compiler/src/constants_optimiser.rs +++ b/compiler/src/constants_optimiser.rs @@ -19,31 +19,39 @@ pub fn calculate_optimal_addition( // STAGE 0: // for efficiency's sake, calculate the cost of just adding the constant to the cell - let solution_0 = { + let naive_solution = { let mut ops = BrainfuckCodeBuilder::new(); ops.head_pos = start_cell; ops.move_to_cell(target_cell); ops.add_to_current_cell(value); ops }; - // https://esolangs.org/wiki/Brainfuck_constants + + // below 15 is pointless according to: https://esolangs.org/wiki/Brainfuck_constants if abs_value < 15 { - return solution_0; + return naive_solution; } // STAGE 1: // find best solution of form a * b + c let solution_1 = { - let mut previous_best: Vec<(usize, usize, usize)> = vec![(0, 0, 0)]; + // dynamic programming algorithm, although not generalised + // initialise so element 0 is also valid + let mut best_combinations: Vec<(usize, usize, usize)> = vec![(0, 0, 0)]; + // Loop until the target number, + // inner loop finds any (a, b)s where a * b = the iteration number i. + // Second inner loop finds c terms so that for each main iteration: + // there is some (a, b, c) where a * b + c = i. + // This finds the "cheapest" meaning the (a, b, c) where a + b + c is lowest. for i in 1..=(abs_value as usize) { - let mut cheapest: (usize, usize, usize) = (1, i, 0); + let mut current_best: (usize, usize, usize) = (1, i, 0); let mut j = 2; while j * j <= i { if i % j == 0 { let o = i / j; - if (j + o) < (cheapest.0 + cheapest.1) { - cheapest = (j, o, 0); + if (j + o) < (current_best.0 + current_best.1) { + current_best = (j, o, 0); } } @@ -52,16 +60,17 @@ pub fn calculate_optimal_addition( for j in 0..i { let diff = i - j; - let (a, b, c) = previous_best[j]; - if (a + b + c + diff) < (cheapest.0 + cheapest.1 + cheapest.2) { - cheapest = (a, b, c + diff); + let (a, b, c) = best_combinations[j]; + if (a + b + c + diff) < (current_best.0 + current_best.1 + current_best.2) { + current_best = (a, b, c + diff); } } - previous_best.push(cheapest); + best_combinations.push(current_best); } - let (a, b, c) = previous_best.into_iter().last().unwrap(); + assert_eq!(best_combinations.len(), (abs_value as usize) + 1); + let (a, b, c) = best_combinations.into_iter().last().unwrap(); let mut ops = BrainfuckCodeBuilder::new(); ops.head_pos = start_cell; @@ -93,9 +102,9 @@ pub fn calculate_optimal_addition( // compare best solutions - if solution_1.len() < solution_0.len() { + if solution_1.len() < naive_solution.len() { solution_1 } else { - solution_0 + naive_solution } } diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index 955aea5..77a8ad1 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -13,6 +13,8 @@ mod parser; mod preprocessor; mod tokeniser; +mod tests; + use brainfuck::{BVMConfig, BVM}; use brainfuck_optimiser::optimise; use builder::{BrainfuckOpcodes, Builder}; diff --git a/compiler/src/mastermind_optimiser.rs b/compiler/src/mastermind_optimiser.rs deleted file mode 100644 index bbb70b8..0000000 --- a/compiler/src/mastermind_optimiser.rs +++ /dev/null @@ -1,6 +0,0 @@ -// Here's the brief: -// your task is to take in a list of clauses, and output an optimised version -// the main functionality will be finding when variables are actually used and minimising their lifetime to reduce allocation time - -// the secondary but also very important task is to use the above variable lifetimes to use compiler construction variants that do not unnecessarily copy variables around -// maybe this second point should be in the compiler, not here diff --git a/compiler/src/parser.rs b/compiler/src/parser.rs index 7fb4156..1b3c4f2 100644 --- a/compiler/src/parser.rs +++ b/compiler/src/parser.rs @@ -594,8 +594,6 @@ fn parse_brainfuck_clause(clause: &[Token]) -> Result { let mut ops = Vec::new(); let mut j = 0; while j < bf_tokens.len() { - // TODO: support embedded mastermind in the embedded brainfuck - // TODO: combine [-] into clear opcodes match &bf_tokens[j] { Token::Plus => ops.push(ExtendedOpcode::Add), Token::Minus => ops.push(ExtendedOpcode::Subtract), diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index c82b8b8..09e63d1 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -2,9 +2,9 @@ // black box testing #[cfg(test)] -pub mod tests { +pub mod black_box_tests { use crate::{ - brainfuck::{tests::run_code, BVMConfig}, + brainfuck::{bvm_tests::run_code, BVMConfig}, builder::{BrainfuckOpcodes, Builder, Opcode}, compiler::Compiler, parser::parse, diff --git a/compiler/src/tokeniser.rs b/compiler/src/tokeniser.rs index a6338ec..9caef6e 100644 --- a/compiler/src/tokeniser.rs +++ b/compiler/src/tokeniser.rs @@ -182,6 +182,7 @@ pub enum Token { UpToken, } +#[cfg(test)] mod tokeniser_tests { use crate::tokeniser::{tokenise, Token}; From ea236889eefe9c9b9a8ad8bbb0a26dd38087ebc6 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Wed, 8 Oct 2025 17:55:09 +1100 Subject: [PATCH 03/56] Add and rename some tokeniser tests --- compiler/src/tokeniser.rs | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/compiler/src/tokeniser.rs b/compiler/src/tokeniser.rs index 9caef6e..3b103ef 100644 --- a/compiler/src/tokeniser.rs +++ b/compiler/src/tokeniser.rs @@ -186,7 +186,7 @@ pub enum Token { mod tokeniser_tests { use crate::tokeniser::{tokenise, Token}; - fn _character_literal_test(input_str: &str, desired_output: &[Token]) { + fn _tokenisation_test(input_str: &str, desired_output: &[Token]) { let input_string = String::from(input_str); let actual_output = tokenise(&input_string).unwrap(); println!("desired: {desired_output:#?}"); @@ -196,7 +196,7 @@ mod tokeniser_tests { #[test] fn character_literals_1() { - _character_literal_test( + _tokenisation_test( r#"'a' 'b' 'c' ' '"#, &[ Token::Character('a'), @@ -211,7 +211,7 @@ mod tokeniser_tests { #[test] fn character_literals_2() { - _character_literal_test( + _tokenisation_test( r#"'\n'"#, &[ Token::Character('\n'), @@ -223,7 +223,7 @@ mod tokeniser_tests { #[test] fn character_literals_3() { - _character_literal_test( + _tokenisation_test( r#"'"'"#, &[ Token::Character('"'), @@ -235,7 +235,7 @@ mod tokeniser_tests { #[test] fn character_literals_4() { - _character_literal_test( + _tokenisation_test( r#"'\''"#, &[ Token::Character('\''), @@ -244,4 +244,29 @@ mod tokeniser_tests { ], ); } + + #[test] + fn string_literals_1() { + _tokenisation_test( + "\"hello\"", + &[ + Token::String(String::from("hello")), + // TODO: remove this None, fix the code that needs it + Token::None, + ], + ); + } + + #[test] + fn string_literals_() { + _tokenisation_test( + r#""\"" " ""#, + &[ + Token::String(String::from("\"")), + Token::String(String::from(" ")), + // TODO: remove this None, fix the code that needs it + Token::None, + ], + ); + } } From 200db2cefac63bd7b0115f6ab7bd9a0cc4680605 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Fri, 10 Oct 2025 20:55:08 +1100 Subject: [PATCH 04/56] Reimplement character and string tokenisation --- compiler/src/tokeniser.rs | 163 ++++++++++++++++++++++++++++++-------- 1 file changed, 132 insertions(+), 31 deletions(-) diff --git a/compiler/src/tokeniser.rs b/compiler/src/tokeniser.rs index 3b103ef..58bddc7 100644 --- a/compiler/src/tokeniser.rs +++ b/compiler/src/tokeniser.rs @@ -50,12 +50,10 @@ pub fn tokenise(source: &String) -> Result, String> { ("+", Token::Plus), ]; // check for numbers and variables - let num_re = Regex::new(r#"^[0-9]+"#).unwrap(); - let name_re = Regex::new(r#"^[a-zA-Z_]\w*"#).unwrap(); - // string regex taken from chatgpt - let str_re = Regex::new(r#"^"(?:[^"\\]|\\.)+""#).unwrap(); - // char regex taken from chatgpt again - let chr_re = Regex::new(r#"^'(?:[^'\\]|\\.)'"#).unwrap(); + let number_regex = Regex::new(r#"^[0-9]+"#).unwrap(); + let name_regex = Regex::new(r#"^[a-zA-Z_]\w*"#).unwrap(); + let string_regex = Regex::new(r#"^"(?:[^"\\]|\\.)*""#).unwrap(); + let character_regex = Regex::new(r#"^'(?:[^'\\]|\\.)'"#).unwrap(); let mut tokens: Vec = Vec::new(); @@ -63,45 +61,50 @@ pub fn tokenise(source: &String) -> Result, String> { while chr_idx < stripped.len() { let remaining = &stripped[chr_idx..]; - if let Some(substring) = num_re + if let Some(raw) = number_regex .captures(remaining) .map(|num_capture| String::from(&num_capture[0])) { - chr_idx += substring.len(); - tokens.push(Token::Digits(substring)); - } else if let Some(substring) = name_re + chr_idx += raw.len(); + tokens.push(Token::Digits(raw)); + } else if let Some(raw) = name_regex .captures(remaining) .map(|name_capture| String::from(&name_capture[0])) - .take_if(|substring| { + .take_if(|raw| { mappings .iter() - .find(|(keyword, _)| substring == *keyword) + .find(|(keyword, _)| raw == *keyword) .is_none() }) { - chr_idx += substring.len(); - tokens.push(Token::Name(substring)); - } else if let Some(substring) = str_re + chr_idx += raw.len(); + tokens.push(Token::Name(raw)); + } else if let Some(raw) = string_regex .captures(remaining) .map(|str_capture| String::from(&str_capture[0])) { - chr_idx += substring.len(); - let unescaped = serde_json::from_str(&substring) - .or(Err("Could not unescape string literal in tokenisation \ -due to serde error, this should never occur."))?; - tokens.push(Token::String(unescaped)); - } else if let Some(substring) = chr_re + chr_idx += raw.len(); + r_assert!( + raw.len() >= 2, + "Not enough characters in string literal token, \ +this should never occur. {raw:#?}" + ); + let oiasdhfidush = &raw[1..(raw.len() - 1)]; + tokens.push(Token::String(tokenise_raw_string_literal( + &raw[1..(raw.len() - 1)], + )?)); + } else if let Some(raw) = character_regex .captures(remaining) .map(|chr_capture| String::from(&chr_capture[0])) { - chr_idx += substring.len(); - // hack: replace single quotes with double quotes, then use serde to unescape all the characters - let escaped_string = String::new() + "\"" + &substring[1..(substring.len() - 1)] + "\""; - let unescaped: String = serde_json::from_str(&escaped_string) - .or(Err("Could not unescape character literal in tokenisation \ -due to serde error, this should never occur."))?; - - r_assert!(unescaped.len() == 1, "Character literals must be length 1"); - tokens.push(Token::Character(unescaped.chars().next().unwrap())); + chr_idx += raw.len(); + r_assert!( + raw.len() >= 2, + "Not enough characters in character literal token, \ +this should never occur. {raw:#?}" + ); + tokens.push(Token::Character(tokenise_raw_character_literal( + &raw[1..(raw.len() - 1)], + )?)); } else if let Some((text, token)) = mappings .iter() .find(|(text, _)| remaining.starts_with(text)) @@ -137,6 +140,53 @@ fn strip_line(line: &str) -> String { .join(" ") } +/// handle character escape sequences +// supports Rust ASCII escapes +fn tokenise_raw_character_literal(raw: &str) -> Result { + let mut s_iter = raw.chars(); + Ok(match s_iter.next() { + Some('\\') => match s_iter.next() { + Some(c) => match c { + '\'' => '\'', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + '\\' => '\\', + '0' => '\0', + _ => r_panic!("Invalid escape sequence in character literal: {raw}"), + }, + None => r_panic!("Expected escape sequence in character literal: {raw}"), + }, + Some(first_char) => first_char, + None => r_panic!("Character literal must be length 1: {raw}"), + }) +} + +/// handle string escape sequences +// supports Rust ASCII escapes +fn tokenise_raw_string_literal(raw: &str) -> Result { + let mut s_iter = raw.chars(); + let mut built_string = String::with_capacity(raw.len()); + while let Some(raw_char) = s_iter.next() { + built_string.push(match raw_char { + '\\' => match s_iter.next() { + Some(c) => match c { + '\"' => '"', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + '\\' => '\\', + '0' => '\0', + _ => r_panic!("Invalid escape sequence in string literal: {raw}"), + }, + None => r_panic!("Expected escape sequence in string literal: {raw}"), + }, + c => c, + }); + } + Ok(built_string) +} + #[derive(Debug, Clone, PartialEq)] pub enum Token { None, @@ -245,6 +295,32 @@ mod tokeniser_tests { ); } + #[test] + #[should_panic] + fn character_literals_5() { + _tokenisation_test( + r#"'\'"#, + &[ + Token::Character('\\'), + // TODO: remove this None, fix the code that needs it + Token::None, + ], + ); + } + + #[test] + #[should_panic] + fn character_literals_6() { + _tokenisation_test( + r#"'aa'"#, + &[ + Token::String(String::from("aa")), + // TODO: remove this None, fix the code that needs it + Token::None, + ], + ); + } + #[test] fn string_literals_1() { _tokenisation_test( @@ -258,7 +334,32 @@ mod tokeniser_tests { } #[test] - fn string_literals_() { + fn string_literals_2() { + _tokenisation_test( + r#""""#, + &[ + Token::String(String::from("")), + // TODO: remove this None, fix the code that needs it + Token::None, + ], + ); + } + + #[test] + fn string_literals_2a() { + _tokenisation_test( + r#""""""#, + &[ + Token::String(String::from("")), + Token::String(String::from("")), + // TODO: remove this None, fix the code that needs it + Token::None, + ], + ); + } + + #[test] + fn string_literals_3() { _tokenisation_test( r#""\"" " ""#, &[ From aeec20bf67175fbe3047abf7f237af32b2aa769d Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Sun, 12 Oct 2025 01:28:33 +1100 Subject: [PATCH 05/56] Add explicit tape cell type and change existing code to suit --- compiler/src/{builder.rs => backend.rs} | 140 ++++++++++++---------- compiler/src/brainfuck_optimiser.rs | 4 +- compiler/src/constants_optimiser.rs | 14 +-- compiler/src/{compiler.rs => frontend.rs} | 140 ++++++++++------------ compiler/src/lib.rs | 27 ++--- compiler/src/main.rs | 39 +++--- compiler/src/misc.rs | 4 + compiler/src/parser.rs | 114 +++++++++--------- compiler/src/tests.rs | 38 ++---- 9 files changed, 250 insertions(+), 270 deletions(-) rename compiler/src/{builder.rs => backend.rs} (84%) rename compiler/src/{compiler.rs => frontend.rs} (94%) diff --git a/compiler/src/builder.rs b/compiler/src/backend.rs similarity index 84% rename from compiler/src/builder.rs rename to compiler/src/backend.rs index 8e7318c..58fb4e1 100644 --- a/compiler/src/builder.rs +++ b/compiler/src/backend.rs @@ -7,38 +7,46 @@ use std::{ collections::{HashMap, HashSet}, + fmt::Display, num::Wrapping, }; use crate::{ - compiler::{CellLocation, Instruction, MemoryId}, constants_optimiser::calculate_optimal_addition, + frontend::{CellLocation, Instruction, MemoryId}, macros::macros::{r_assert, r_panic}, - MastermindConfig, + misc::MastermindContext, }; -pub struct Builder<'a> { - pub config: &'a MastermindConfig, -} - type LoopDepth = usize; -pub type TapeCell = (i32, i32); type TapeValue = u8; -impl Builder<'_> { - pub fn build( +#[derive(PartialEq, Clone, Hash, Eq, Copy)] +pub struct TapeCell2D(pub i32, pub i32); + +impl Display for TapeCell2D { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("({},{})", self.0, self.1))?; + Ok(()) + } +} + +impl MastermindContext<'_> { + pub fn ir_to_bf( &self, - instructions: Vec, + instructions: Vec>, return_to_origin: bool, ) -> Result, String> { let mut allocator = CellAllocator::new(); - let mut alloc_map: HashMap>)> = - HashMap::new(); + let mut alloc_map: HashMap< + MemoryId, + (TapeCell2D, usize, LoopDepth, Vec>), + > = HashMap::new(); - let mut loop_stack: Vec = Vec::new(); + let mut loop_stack: Vec = Vec::new(); let mut current_loop_depth: LoopDepth = 0; let mut skipped_loop_depth: Option = None; - let mut ops = BrainfuckCodeBuilder::new(); + let mut ops = BFBuilder::new(); for instruction in instructions { if let Some(depth) = skipped_loop_depth { @@ -138,7 +146,7 @@ outside of loop it was allocated" mem_idx < *size, "Attempted to access memory outside of allocation" ); - let cell = (cell_base.0 + mem_idx as i32, cell_base.1); + let cell = TapeCell2D(cell_base.0 + mem_idx as i32, cell_base.1); let known_value = &mut known_values[mem_idx]; let mut open = true; @@ -175,7 +183,7 @@ outside of loop it was allocated" mem_idx < *size, "Attempted to access memory outside of allocation" ); - let cell = (cell_base.0 + mem_idx as i32, cell_base.1); + let cell = TapeCell2D(cell_base.0 + mem_idx as i32, cell_base.1); let known_value = &mut known_values[mem_idx]; let Some(stack_cell) = loop_stack.pop() else { @@ -206,7 +214,7 @@ outside of loop it was allocated" mem_idx < *size, "Attempted to access memory outside of allocation" ); - let cell = (cell_base.0 + mem_idx as i32, cell_base.1); + let cell = TapeCell2D(cell_base.0 + mem_idx as i32, cell_base.1); let known_value = &mut known_values[mem_idx]; // TODO: fix bug, if only one multiplication then we can have a value already in the cell, but never otherwise @@ -219,7 +227,7 @@ outside of loop it was allocated" // TODO: instead find the nearest zero cell, doesn't matter if allocated or not let temp_cell = allocator.allocate_temp_cell(cell); - let optimised_ops: BrainfuckCodeBuilder = + let optimised_ops: BFBuilder = calculate_optimal_addition(imm as i8, ops.head_pos, cell, temp_cell); ops.head_pos = optimised_ops.head_pos; @@ -254,7 +262,7 @@ outside of loop it was allocated" mem_idx < *size, "Attempted to access memory outside of allocation" ); - let cell = (cell_base.0 + mem_idx as i32, cell_base.1); + let cell = TapeCell2D(cell_base.0 + mem_idx as i32, cell_base.1); let known_value = &mut known_values[mem_idx]; ops.move_to_cell(cell); @@ -275,7 +283,7 @@ outside of loop it was allocated" mem_idx < *size, "Attempted to access memory outside of allocation" ); - let cell = (cell_base.0 + mem_idx as i32, cell_base.1); + let cell = TapeCell2D(cell_base.0 + mem_idx as i32, cell_base.1); let known_value = &mut known_values[mem_idx]; ops.move_to_cell(cell); @@ -323,7 +331,7 @@ outside of loop it was allocated" mem_idx < *size, "Attempted to access memory outside of allocation" ); - let cell = (cell_base.0 + mem_idx as i32, cell_base.1); + let cell = TapeCell2D(cell_base.0 + mem_idx as i32, cell_base.1); ops.move_to_cell(cell); ops.push(Opcode::Output); @@ -343,7 +351,7 @@ outside of loop it was allocated" mem_idx < *size, "Attempted to access memory outside of allocation" ); - let cell = (cell_base.0 + mem_idx as i32, cell_base.1); + let cell = TapeCell2D(cell_base.0 + mem_idx as i32, cell_base.1); ops.move_to_cell(cell); } CellLocation::Unspecified => (), @@ -357,7 +365,7 @@ outside of loop it was allocated" // this is used in embedded brainfuck contexts to preserve head position if return_to_origin { - ops.move_to_cell((0, 0)); + ops.move_to_cell(TapeCell2D(0, 0)); } Ok(ops.opcodes) @@ -365,7 +373,7 @@ outside of loop it was allocated" } struct CellAllocator { - alloc_map: HashSet, + alloc_map: HashSet, } // allocator will not automatically allocate negative-index cells @@ -378,11 +386,11 @@ impl CellAllocator { } /// Checks if the memory size can be allocated to the right of a given location e.g. arrays - fn check_allocatable(&mut self, location: &TapeCell, size: usize) -> bool { + fn check_allocatable(&mut self, location: &TapeCell2D, size: usize) -> bool { for k in 0..size { if self .alloc_map - .contains(&(location.0 + k as i32, location.1)) + .contains(&TapeCell2D(location.0 + k as i32, location.1)) { return false; } @@ -395,11 +403,11 @@ impl CellAllocator { /// Uses a variety of memory allocation methods based on settings fn allocate( &mut self, - location: Option, + location: Option, size: usize, method: u8, - ) -> Result { - let mut region_start = location.unwrap_or((0, 0)); + ) -> Result { + let mut region_start = location.unwrap_or(TapeCell2D(0, 0)); //Check specified memory allocation above to ensure that this works nicely with all algorithms if let Some(l) = location { if !self.check_allocatable(&l, size) { @@ -413,8 +421,8 @@ impl CellAllocator { // should the region start at the current tape head? if method == 0 { for i in region_start.0.. { - if self.alloc_map.contains(&(i, region_start.1)) { - region_start = (i + 1, region_start.1); + if self.alloc_map.contains(&TapeCell2D(i, region_start.1)) { + region_start = TapeCell2D(i + 1, region_start.1); } else if i - region_start.0 == (size as i32 - 1) { break; } @@ -429,9 +437,9 @@ impl CellAllocator { i = region_start.0 + loops; j = region_start.1; for _ in 0..=loops { - if self.check_allocatable(&(i, j), size) { + if self.check_allocatable(&TapeCell2D(i, j), size) { found = true; - region_start = (i, j); + region_start = TapeCell2D(i, j); break; } i = i - 1; @@ -452,9 +460,9 @@ impl CellAllocator { 'N' => { for _ in 0..loops { j += 1; - if self.check_allocatable(&(i, j), size) { + if self.check_allocatable(&TapeCell2D(i, j), size) { found = true; - region_start = (i, j); + region_start = TapeCell2D(i, j); break; } } @@ -462,9 +470,9 @@ impl CellAllocator { 'E' => { for _ in 0..loops { i += 1; - if self.check_allocatable(&(i, j), size) { + if self.check_allocatable(&TapeCell2D(i, j), size) { found = true; - region_start = (i, j); + region_start = TapeCell2D(i, j); break; } } @@ -472,9 +480,9 @@ impl CellAllocator { 'S' => { for _ in 0..loops { j -= 1; - if self.check_allocatable(&(i, j), size) { + if self.check_allocatable(&TapeCell2D(i, j), size) { found = true; - region_start = (i, j); + region_start = TapeCell2D(i, j); break; } } @@ -482,9 +490,9 @@ impl CellAllocator { 'W' => { for _ in 0..loops { i -= 1; - if self.check_allocatable(&(i, j), size) { + if self.check_allocatable(&TapeCell2D(i, j), size) { found = true; - region_start = (i, j); + region_start = TapeCell2D(i, j); break; } } @@ -509,11 +517,12 @@ impl CellAllocator { while !found { for i in -loops..=loops { for j in -loops..=loops { - if self - .check_allocatable(&(region_start.0 + i, region_start.1 + j), size) - { + if self.check_allocatable( + &TapeCell2D(region_start.0 + i, region_start.1 + j), + size, + ) { found = true; - region_start = (region_start.0 + i, region_start.1 + j); + region_start = TapeCell2D(region_start.0 + i, region_start.1 + j); break; } } @@ -530,8 +539,8 @@ impl CellAllocator { // make all cells in the specified region allocated for i in region_start.0..(region_start.0 + size as i32) { - if !self.alloc_map.contains(&(i, region_start.1)) { - self.alloc_map.insert((i, region_start.1)); + if !self.alloc_map.contains(&TapeCell2D(i, region_start.1)) { + self.alloc_map.insert(TapeCell2D(i, region_start.1)); } } @@ -541,7 +550,7 @@ impl CellAllocator { // allocate but start looking close to the given cell, used for optimising constants as you need an extra cell to multiply // again not sure if this stuff should be in the builder step or the compiler step ? This seems the simplest for now // but I'm wary that complex systems often evolve from simple ones, and any optimisations introduce complexity - fn allocate_temp_cell(&mut self, location: TapeCell) -> TapeCell { + fn allocate_temp_cell(&mut self, location: TapeCell2D) -> TapeCell2D { // this will allocate the given cell if unallocated so beware if self.alloc_map.insert(location) { return location; @@ -554,29 +563,28 @@ impl CellAllocator { loop { if let Some(i) = left_iter.next() { // unallocated cell, allocate it and return - if self.alloc_map.insert((i, location.1)) { - return (i, location.1); - } else { + if self.alloc_map.insert(TapeCell2D(i, location.1)) { + return TapeCell2D(i, location.1); } } if let Some(i) = right_iter.next() { - if self.alloc_map.insert((i, location.1)) { - return (i, location.1); + if self.alloc_map.insert(TapeCell2D(i, location.1)) { + return TapeCell2D(i, location.1); } } } } - fn free(&mut self, cell: TapeCell, size: usize) -> Result<(), String> { + fn free(&mut self, cell: TapeCell2D, size: usize) -> Result<(), String> { for i in cell.0..(cell.0 + size as i32) { r_assert!( - self.alloc_map.contains(&(i, cell.1)), + self.alloc_map.contains(&TapeCell2D(i, cell.1)), "Cannot free cell @{0},{1} as it is not allocated.", i, cell.1 ); - self.alloc_map.remove(&(i, cell.1)); + self.alloc_map.remove(&TapeCell2D(i, cell.1)); } Ok(()) @@ -598,9 +606,9 @@ pub enum Opcode { Down, } -pub struct BrainfuckCodeBuilder { +pub struct BFBuilder { opcodes: Vec, - pub head_pos: TapeCell, + pub head_pos: TapeCell2D, } pub trait BrainfuckOpcodes { @@ -659,24 +667,24 @@ impl BrainfuckOpcodes for Vec { } } -impl BrainfuckOpcodes for BrainfuckCodeBuilder { +impl BrainfuckOpcodes for BFBuilder { fn to_string(self) -> String { self.opcodes.to_string() } fn from_str(s: &str) -> Self { - BrainfuckCodeBuilder { + BFBuilder { opcodes: BrainfuckOpcodes::from_str(s), - head_pos: (0, 0), + head_pos: TapeCell2D(0, 0), } } } -impl BrainfuckCodeBuilder { - pub fn new() -> BrainfuckCodeBuilder { - BrainfuckCodeBuilder { +impl BFBuilder { + pub fn new() -> BFBuilder { + BFBuilder { opcodes: Vec::new(), - head_pos: (0, 0), + head_pos: TapeCell2D(0, 0), } } pub fn len(&self) -> usize { @@ -691,7 +699,7 @@ impl BrainfuckCodeBuilder { { self.opcodes.extend(ops); } - pub fn move_to_cell(&mut self, cell: TapeCell) { + pub fn move_to_cell(&mut self, cell: TapeCell2D) { let x = cell.0; let y = cell.1; let x_pos = self.head_pos.0; diff --git a/compiler/src/brainfuck_optimiser.rs b/compiler/src/brainfuck_optimiser.rs index 97e6af5..e623e21 100644 --- a/compiler/src/brainfuck_optimiser.rs +++ b/compiler/src/brainfuck_optimiser.rs @@ -1,4 +1,4 @@ -use crate::builder::Opcode; +use crate::backend::Opcode; use itertools::Itertools; use std::{collections::HashMap, num::Wrapping}; @@ -206,7 +206,7 @@ fn optimise_subset(run: Vec, exhaustive: bool) -> Vec { #[cfg(test)] mod bf_optimiser_tests { - use crate::builder::BrainfuckOpcodes; + use crate::backend::BrainfuckOpcodes; use super::*; diff --git a/compiler/src/constants_optimiser.rs b/compiler/src/constants_optimiser.rs index 63097f8..aa67756 100644 --- a/compiler/src/constants_optimiser.rs +++ b/compiler/src/constants_optimiser.rs @@ -1,5 +1,5 @@ // TODO: make unit tests for this -use crate::builder::{BrainfuckCodeBuilder, Opcode, TapeCell}; +use crate::backend::{BFBuilder, Opcode, TapeCell2D}; // basically, most ascii characters are large numbers, which are more efficient to calculate with multiplication than with a bunch of + or - // an optimising brainfuck runtime will prefer a long string of +++++ or ----- however the goal of mastermind is to be used for code golf, which is not about speed @@ -10,17 +10,17 @@ use crate::builder::{BrainfuckCodeBuilder, Opcode, TapeCell}; // 5 * 5 * 7 : +++++[>+++++<-]>[<+++++++>-]< pub fn calculate_optimal_addition( value: i8, - start_cell: TapeCell, - target_cell: TapeCell, - temp_cell: TapeCell, -) -> BrainfuckCodeBuilder { + start_cell: TapeCell2D, + target_cell: TapeCell2D, + temp_cell: TapeCell2D, +) -> BFBuilder { // can't abs() i8 directly because there is no +128i8, so abs(-128i8) crashes let abs_value = (value as i32).abs(); // STAGE 0: // for efficiency's sake, calculate the cost of just adding the constant to the cell let naive_solution = { - let mut ops = BrainfuckCodeBuilder::new(); + let mut ops = BFBuilder::new(); ops.head_pos = start_cell; ops.move_to_cell(target_cell); ops.add_to_current_cell(value); @@ -71,7 +71,7 @@ pub fn calculate_optimal_addition( assert_eq!(best_combinations.len(), (abs_value as usize) + 1); let (a, b, c) = best_combinations.into_iter().last().unwrap(); - let mut ops = BrainfuckCodeBuilder::new(); + let mut ops = BFBuilder::new(); ops.head_pos = start_cell; ops.move_to_cell(temp_cell); diff --git a/compiler/src/compiler.rs b/compiler/src/frontend.rs similarity index 94% rename from compiler/src/compiler.rs rename to compiler/src/frontend.rs index 2ed69f3..e106f6c 100644 --- a/compiler/src/compiler.rs +++ b/compiler/src/frontend.rs @@ -3,8 +3,9 @@ use std::{collections::HashMap, iter::zip}; use crate::{ - builder::{Builder, Opcode, TapeCell}, + backend::{Opcode, TapeCell2D}, macros::macros::{r_assert, r_panic}, + misc::MastermindContext, parser::{ Clause, Expression, ExtendedOpcode, LocationSpecifier, Reference, VariableDefinition, VariableTarget, VariableTargetReferenceChain, VariableTypeReference, @@ -14,25 +15,21 @@ use crate::{ // memory stuff is all WIP and some comments may be incorrect -pub struct Compiler<'a> { - pub config: &'a MastermindConfig, -} - -impl Compiler<'_> { - pub fn compile<'a>( +impl MastermindContext<'_> { + pub fn create_ir_scope<'a>( &'a self, - clauses: &[Clause], - outer_scope: Option<&'a Scope>, - ) -> Result, String> { + clauses: &[Clause], + outer_scope: Option<&'a ScopeBuilder>, + ) -> Result, String> { let mut scope = if let Some(outer) = outer_scope { outer.open_inner() } else { - Scope::new() + ScopeBuilder::new() }; // TODO: fix unnecessary clones, and reimplement this with iterators somehow // hoist structs, then functions to top - let mut filtered_clauses_1: Vec = vec![]; + let mut filtered_clauses_1: Vec> = vec![]; // first stage: structs (these need to be defined before functions, so they can be used as arguments) for clause in clauses { match clause { @@ -43,7 +40,7 @@ impl Compiler<'_> { } } // second stage: functions - let mut filtered_clauses_2: Vec = vec![]; + let mut filtered_clauses_2: Vec> = vec![]; for clause in filtered_clauses_1 { match clause { Clause::DefineFunction { @@ -317,10 +314,8 @@ impl Compiler<'_> { // recursively compile instructions // TODO: when recursively compiling, check which things changed based on a return info value - let loop_scope = self.compile(&block, Some(&scope))?; - scope - .instructions - .extend(loop_scope.finalise_instructions(true)); + let loop_scope = self.create_ir_scope(&block, Some(&scope))?; + scope.instructions.extend(loop_scope.build_ir(true)); // close the loop scope.push_instruction(Instruction::CloseLoop(cell)); @@ -372,11 +367,9 @@ impl Compiler<'_> { scope.push_instruction(Instruction::OpenLoop(source_cell)); // recurse - let loop_scope = self.compile(&block, Some(&scope))?; + let loop_scope = self.create_ir_scope(&block, Some(&scope))?; // TODO: refactor, make a function in scope trait to do this automatically - scope - .instructions - .extend(loop_scope.finalise_instructions(true)); + scope.instructions.extend(loop_scope.build_ir(true)); // copy into each target and decrement the source for target in targets { @@ -456,10 +449,8 @@ impl Compiler<'_> { // recursively compile if block if let Some(block) = if_block { - let if_scope = self.compile(&block, Some(&new_scope))?; - new_scope - .instructions - .extend(if_scope.finalise_instructions(true)); + let if_scope = self.create_ir_scope(&block, Some(&new_scope))?; + new_scope.instructions.extend(if_scope.build_ir(true)); }; // close if block @@ -475,25 +466,19 @@ impl Compiler<'_> { // recursively compile else block // TODO: fix this bad practice unwrap let block = else_block.unwrap(); - let else_scope = self.compile(&block, Some(&new_scope))?; - new_scope - .instructions - .extend(else_scope.finalise_instructions(true)); + let else_scope = self.create_ir_scope(&block, Some(&new_scope))?; + new_scope.instructions.extend(else_scope.build_ir(true)); new_scope.push_instruction(Instruction::CloseLoop(cell)); new_scope.push_instruction(Instruction::Free(cell.memory_id)); } // extend the inner scopes instructions onto the outer one - scope - .instructions - .extend(new_scope.finalise_instructions(true)); + scope.instructions.extend(new_scope.build_ir(true)); } Clause::Block(clauses) => { - let new_scope = self.compile(&clauses, Some(&scope))?; - scope - .instructions - .extend(new_scope.finalise_instructions(true)); + let new_scope = self.create_ir_scope(&clauses, Some(&scope))?; + scope.instructions.extend(new_scope.build_ir(true)); } Clause::InlineBrainfuck { location_specifier, @@ -509,18 +494,15 @@ impl Compiler<'_> { let functions_scope = scope.open_inner_templates_only(); // compile the block and extend the operations - let compiler = Compiler { + let ctx = MastermindContext { config: &self.config, }; - let instructions = compiler - .compile(&mm_clauses, Some(&functions_scope))? - .finalise_instructions(false); + let instructions = ctx + .create_ir_scope(&mm_clauses, Some(&functions_scope))? + .build_ir(false); // compile without cleaning up top level variables, this is the brainfuck programmer's responsibility // it is also the brainfuck programmer's responsibility to return to the start position - let builder = Builder { - config: &self.config, - }; - let built_code = builder.build(instructions, true)?; + let built_code = ctx.ir_to_bf(instructions, true)?; expanded_bf.extend(built_code); } ExtendedOpcode::Add => expanded_bf.push(Opcode::Add), @@ -599,19 +581,19 @@ impl Compiler<'_> { } // recurse - let function_scope = self.compile( + let function_scope = self.create_ir_scope( &function_definition.block, Some(&argument_translation_scope), )?; argument_translation_scope .instructions - .extend(function_scope.finalise_instructions(true)); + .extend(function_scope.build_ir(true)); // extend the inner scope instructions onto the outer scope // maybe function call compiling should be its own function? scope .instructions - .extend(argument_translation_scope.finalise_instructions(false)); + .extend(argument_translation_scope.build_ir(false)); } Clause::DefineStruct { name: _, fields: _ } | Clause::DefineFunction { @@ -630,7 +612,7 @@ impl Compiler<'_> { // helper function for a common use-case // flatten an expression and add it to a specific cell (using copies and adds, etc) fn _add_expr_to_cell( - scope: &mut Scope, + scope: &mut ScopeBuilder, expr: &Expression, cell: CellReference, ) -> Result<(), String> { @@ -659,7 +641,7 @@ fn _add_expr_to_cell( //This function allows you to add a self referencing expression to the cell //Separate this to ensure that normal expression don't require the overhead of copying fn _add_self_referencing_expr_to_cell( - scope: &mut Scope, + scope: &mut ScopeBuilder, expr: Expression, cell: CellReference, pre_clear: bool, @@ -716,7 +698,7 @@ fn _add_self_referencing_expr_to_cell( /// Helper function to copy a cell from one to another leaving the original unaffected // TODO: make one for draining a cell fn _copy_cell( - scope: &mut Scope, + scope: &mut ScopeBuilder, source_cell: CellReference, target_cell: CellReference, constant: i32, @@ -750,7 +732,7 @@ fn _copy_cell( // this is subject to change #[derive(Debug, Clone)] -pub enum Instruction { +pub enum Instruction { Allocate(Memory, Option), Free(MemoryId), // the number indicates which cell in the allocation stack should be freed (cell 0, is the top of the stack, 1 is the second element, etc) OpenLoop(CellReference), // same with other numbers here, they indicate the cell in the allocation stack to use in the instruction @@ -760,14 +742,14 @@ pub enum Instruction { ClearCell(CellReference), // not sure if this should be here, seems common enough that it should be AssertCellValue(CellReference, Option), // allows the user to hand-tune optimisations further OutputCell(CellReference), - InsertBrainfuckAtCell(Vec, CellLocation), + InsertBrainfuckAtCell(Vec, CellLocation), } #[derive(Debug, Clone)] /// Either a fixed constant cell or a reference to some existing memory -pub enum CellLocation { +pub enum CellLocation { Unspecified, - FixedCell((i32, i32)), + FixedCell(TapeCell), MemoryCell(CellReference), } @@ -830,9 +812,9 @@ impl Memory { #[derive(Clone, Debug)] /// Scope type represents a Mastermind code block, /// any variables or functions defined within a {block} are owned by the scope and cleaned up before continuing -pub struct Scope<'a> { +pub struct ScopeBuilder<'a, TapeCell> { /// a reference to the parent scope, for accessing things defined outside of this scope - outer_scope: Option<&'a Scope<'a>>, + outer_scope: Option<&'a ScopeBuilder<'a, TapeCell>>, /// fn_only: true if syntactic context instead of normal context. /// Used for embedded mm so that the inner mm can use outer functions but not variables. types_only: bool, @@ -844,18 +826,18 @@ pub struct Scope<'a> { variable_memory: HashMap, /// Functions accessible by any code within or in the current scope - functions: Vec<(String, Vec<(String, ValueType)>, Vec)>, + functions: Vec<(String, Vec<(String, ValueType)>, Vec>)>, /// Struct types definitions structs: HashMap, /// Intermediate instructions generated by the compiler - instructions: Vec, + instructions: Vec>, } #[derive(Clone, Debug)] // probably shouldn't be cloning here but whatever -struct Function { +struct Function { arguments: Vec<(String, ValueType)>, - block: Vec, + block: Vec>, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -989,9 +971,9 @@ impl ValueType { } } -impl Scope<'_> { - pub fn new() -> Scope<'static> { - Scope { +impl ScopeBuilder<'_, TapeCell2D> { + pub fn new() -> ScopeBuilder<'static, TapeCell2D> { + ScopeBuilder { outer_scope: None, types_only: false, allocations: 0, @@ -1004,7 +986,7 @@ impl Scope<'_> { // I don't love this system of deciding what to clean up at the end in this specific function, but I'm not sure what the best way to achieve this would be // this used to be called "get_instructions" but I think this more implies things are being modified - pub fn finalise_instructions(mut self, clean_up_variables: bool) -> Vec { + pub fn build_ir(mut self, clean_up_variables: bool) -> Vec> { if !clean_up_variables { return self.instructions; } @@ -1047,13 +1029,13 @@ impl Scope<'_> { self.instructions } - fn push_instruction(&mut self, instruction: Instruction) { + fn push_instruction(&mut self, instruction: Instruction) { self.instructions.push(instruction); } /// Open a scope within the current one, any time there is a {} in Mastermind, this is called - fn open_inner(&self) -> Scope { - Scope { + fn open_inner(&self) -> ScopeBuilder { + ScopeBuilder { outer_scope: Some(self), types_only: false, allocations: 0, @@ -1066,8 +1048,8 @@ impl Scope<'_> { // syntactic context instead of normal context // used for embedded mm so that the inner mm can use outer functions - fn open_inner_templates_only(&self) -> Scope { - Scope { + fn open_inner_templates_only(&self) -> ScopeBuilder { + ScopeBuilder { outer_scope: Some(self), types_only: true, allocations: 0, @@ -1079,7 +1061,10 @@ impl Scope<'_> { } /// Get the correct variable type and allocate the right amount of cells for it - fn allocate_variable(&mut self, var: VariableDefinition) -> Result<&ValueType, String> { + fn allocate_variable( + &mut self, + var: VariableDefinition, + ) -> Result<&ValueType, String> { r_assert!( !self.variable_memory.contains_key(&var.name), "Cannot allocate variable {var} twice in the same scope" @@ -1148,7 +1133,7 @@ impl Scope<'_> { &self, calling_name: &str, calling_arg_types: &Vec<&ValueType>, - ) -> Result { + ) -> Result, String> { // this function is unaffected by the self.fn_only flag Ok( if let Some(func) = self.functions.iter().find(|(name, args, _)| { @@ -1163,8 +1148,11 @@ impl Scope<'_> { true }) { // TODO: stop cloning! This function overload stuff is tacked on and needs refactoring - let (_, arguments, block) = func.clone(); - Function { arguments, block } + let (_, arguments, block) = func; + Function { + arguments: arguments.clone(), + block: block.clone(), + } } else if let Some(outer_scope) = self.outer_scope { outer_scope.get_function(calling_name, calling_arg_types)? } else { @@ -1177,7 +1165,7 @@ impl Scope<'_> { fn register_struct_definition( &mut self, struct_name: &str, - fields: Vec, + fields: Vec>, ) -> Result<(), String> { let mut absolute_fields = vec![]; @@ -1213,8 +1201,8 @@ impl Scope<'_> { fn register_function_definition( &mut self, new_function_name: &str, - new_arguments: Vec, - new_block: Vec, + new_arguments: Vec>, + new_block: Vec>, ) -> Result<(), String> { let absolute_arguments = new_arguments .into_iter() diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index 77a8ad1..5efc698 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -3,11 +3,11 @@ mod macros; // allowing dead code because we have two different compile targets (wasm and command-line) +mod backend; mod brainfuck; mod brainfuck_optimiser; -mod builder; -mod compiler; mod constants_optimiser; +mod frontend; mod misc; mod parser; mod preprocessor; @@ -15,10 +15,9 @@ mod tokeniser; mod tests; +use backend::BrainfuckOpcodes; use brainfuck::{BVMConfig, BVM}; use brainfuck_optimiser::optimise; -use builder::{BrainfuckOpcodes, Builder}; -use compiler::Compiler; use misc::MastermindConfig; use parser::parse; use preprocessor::preprocess_from_memory; @@ -28,14 +27,11 @@ use std::collections::HashMap; use wasm_bindgen::{prelude::wasm_bindgen, JsValue}; -// copied from rustwasm.github.io +use crate::misc::MastermindContext; + pub fn set_panic_hook() { - // When the `console_error_panic_hook` feature is enabled, we can call the - // `set_panic_hook` function at least once during initialization, and then - // we will get better error messages if our code ever panics. - // - // For more details see - // https://github.com/rustwasm/console_error_panic_hook#readme + // copied from rustwasm.github.io + // https://github.com/rustwasm/console_error_panic_hook #[cfg(feature = "console_error_panic_hook")] console_error_panic_hook::set_once(); } @@ -51,14 +47,13 @@ pub fn wasm_compile( let file_contents: HashMap = serde_wasm_bindgen::from_value(file_contents).unwrap(); let config: MastermindConfig = serde_wasm_bindgen::from_value(config).unwrap(); - let compiler = Compiler { config: &config }; - let builder = Builder { config: &config }; + let ctx = MastermindContext { config: &config }; let preprocessed_file = preprocess_from_memory(&file_contents, entry_file_name)?; let tokens = tokenise(&preprocessed_file)?; - let parsed = parse(&tokens)?; - let instructions = compiler.compile(&parsed, None)?; - let bf_code = builder.build(instructions.finalise_instructions(false), false)?; + let parsed_syntax = parse(&tokens)?; + let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); + let bf_code = ctx.ir_to_bf(instructions, false)?; Ok(match config.optimise_generated_code { true => optimise(bf_code, config.optimise_generated_all_permutations).to_string(), diff --git a/compiler/src/main.rs b/compiler/src/main.rs index 9b38a0f..d817531 100644 --- a/compiler/src/main.rs +++ b/compiler/src/main.rs @@ -2,23 +2,21 @@ mod macros; -// Stages: (rust format has jumbled these) -mod brainfuck; // 6. Run -mod brainfuck_optimiser; // 5. Post-Optimise -mod builder; // 4. Build (and pre-optimise) -mod compiler; // 3. Compile -mod constants_optimiser; // a component of 4 -mod parser; // 2. Parse -mod preprocessor; // 0. Preprocess includes and macro-type stuff -mod tokeniser; // 1. Tokenise +mod backend; +mod brainfuck; +mod brainfuck_optimiser; +mod constants_optimiser; +mod frontend; +mod parser; +mod preprocessor; +mod tokeniser; mod misc; mod tests; +use backend::BrainfuckOpcodes; use brainfuck::{BVMConfig, BVM}; use brainfuck_optimiser::optimise; -use builder::{BrainfuckOpcodes, Builder}; -use compiler::Compiler; use misc::MastermindConfig; use parser::parse; use preprocessor::preprocess; @@ -28,6 +26,8 @@ use std::io::{stdin, stdout, Cursor}; use clap::Parser; +use crate::misc::MastermindContext; + #[derive(Parser, Default, Debug)] #[command(author = "Heathcorp", version = "0.1", about = "Mastermind: the Brainfuck interpreter and compilation tool", long_about = None)] struct Arguments { @@ -74,7 +74,9 @@ fn main() -> Result<(), String> { let args = Arguments::parse(); + // TODO: change this to not be a bitmask, or at least document it let config = MastermindConfig::new(args.optimise); + let ctx = MastermindContext { config: &config }; let program; match args.file { @@ -101,19 +103,12 @@ fn main() -> Result<(), String> { // 2 stage compilation step, first stage compiles syntax tree into low-level instructions // second stage actually writes out the low-level instructions into brainfuck - let compiler = Compiler { config: &config }; - let instructions = compiler - .compile(&clauses, None)? - .finalise_instructions(false); - - let builder = Builder { config: &config }; - let bf_program = builder.build(instructions, false)?; + let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); + let bf_code = ctx.ir_to_bf(instructions, false)?; match config.optimise_generated_code { - true => { - optimise(bf_program, config.optimise_generated_all_permutations).to_string() - } - false => bf_program.to_string(), + true => optimise(bf_code, config.optimise_generated_all_permutations).to_string(), + false => bf_code.to_string(), } } false => program, diff --git a/compiler/src/misc.rs b/compiler/src/misc.rs index b61e3bf..bf1430c 100644 --- a/compiler/src/misc.rs +++ b/compiler/src/misc.rs @@ -43,3 +43,7 @@ impl MastermindConfig { } } } + +pub struct MastermindContext<'a> { + pub config: &'a MastermindConfig, +} diff --git a/compiler/src/parser.rs b/compiler/src/parser.rs index 1b3c4f2..fb16205 100644 --- a/compiler/src/parser.rs +++ b/compiler/src/parser.rs @@ -1,15 +1,15 @@ use crate::{ - builder::TapeCell, + backend::TapeCell2D, macros::macros::{r_assert, r_panic}, tokeniser::Token, }; use std::{fmt::Display, mem::discriminant, num::Wrapping}; // recursive function to create a tree representation of the program -pub fn parse(tokens: &[Token]) -> Result, String> { +pub fn parse(tokens: &[Token]) -> Result>, String> { // basic steps: // chew off tokens from the front, recursively parse blocks of tokens - let mut clauses: Vec = Vec::new(); + let mut clauses = Vec::new(); let mut i = 0usize; while let Some(clause) = get_clause_tokens(&tokens[i..])? { match ( @@ -110,7 +110,7 @@ pub fn parse(tokens: &[Token]) -> Result, String> { Ok(clauses) } -fn parse_let_clause(clause: &[Token]) -> Result { +fn parse_let_clause(clause: &[Token]) -> Result, String> { // cell x = 0; // struct DummyStruct y let mut i = 0usize; @@ -134,7 +134,7 @@ fn parse_let_clause(clause: &[Token]) -> Result { } /// Parse tokens representing a struct definition into a clause -fn parse_struct_clause(clause: &[Token]) -> Result { +fn parse_struct_clause(clause: &[Token]) -> Result, String> { let mut i = 0usize; let Token::Struct = &clause[i] else { r_panic!("Expected struct keyword in struct clause. This should never occur. {clause:#?}"); @@ -182,8 +182,8 @@ fn parse_struct_clause(clause: &[Token]) -> Result { }) } -fn parse_add_clause(clause: &[Token]) -> Result, String> { - let mut clauses: Vec = Vec::new(); +fn parse_add_clause(clause: &[Token]) -> Result>, String> { + let mut clauses: Vec> = Vec::new(); let mut i = 0usize; let (var, len) = parse_var_target(&clause[i..])?; i += len; @@ -217,7 +217,7 @@ fn parse_add_clause(clause: &[Token]) -> Result, String> { } // currently just syntax sugar, should make it actually do post/pre increments -fn parse_increment_clause(clause: &[Token]) -> Result { +fn parse_increment_clause(clause: &[Token]) -> Result, String> { let (var, _) = parse_var_target(&clause[2..])?; //An increment clause can never be self referencing since it just VAR++ Ok(match (&clause[0], &clause[1]) { @@ -238,9 +238,9 @@ fn parse_increment_clause(clause: &[Token]) -> Result { // assumed that the final token is a semicolon } -fn parse_set_clause(clause: &[Token]) -> Result, String> { +fn parse_set_clause(clause: &[Token]) -> Result>, String> { // TODO: what do we do about arrays and strings and structs? - let mut clauses: Vec = Vec::new(); + let mut clauses: Vec> = Vec::new(); let mut i = 0usize; let (var, len) = parse_var_target(&clause[i..])?; i += len; @@ -287,14 +287,17 @@ fn parse_set_clause(clause: &[Token]) -> Result, String> { Ok(clauses) } -fn parse_drain_copy_clause(clause: &[Token], is_draining: bool) -> Result { +fn parse_drain_copy_clause( + clause: &[Token], + is_draining: bool, +) -> Result, String> { // drain g {i += 1;}; // drain g into j; // copy foo into bar {g += 2; etc;}; // TODO: make a tuple-parsing function and use it here instead of a space seperated list of targets let mut targets = Vec::new(); - let mut block: Vec = Vec::new(); + let mut block: Vec> = Vec::new(); let mut i = 1usize; let condition_start_token = i; @@ -350,7 +353,7 @@ fn parse_drain_copy_clause(clause: &[Token], is_draining: bool) -> Result Result { +fn parse_while_clause(clause: &[Token]) -> Result, String> { // TODO: make this able to accept expressions let mut i = 1usize; let (var, len) = parse_var_target(&clause[i..])?; @@ -372,7 +375,7 @@ fn parse_while_clause(clause: &[Token]) -> Result { }) } -fn parse_if_else_clause(clause: &[Token]) -> Result { +fn parse_if_else_clause(clause: &[Token]) -> Result, String> { // skip first token, assumed to start with if let mut i = 1usize; let mut not = false; @@ -397,13 +400,13 @@ fn parse_if_else_clause(clause: &[Token]) -> Result { let condition = Expression::parse(&clause[condition_start_token..i])?; - let block_one: Vec = { + let block_one: Vec> = { let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; i += 2 + block_tokens.len(); parse(block_tokens)? }; - let block_two: Option> = if let Some(Token::Else) = &clause.get(i) { + let block_two: Option>> = if let Some(Token::Else) = &clause.get(i) { i += 1; let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; // i += 2 + block_tokens.len(); @@ -426,7 +429,7 @@ fn parse_if_else_clause(clause: &[Token]) -> Result { }) } -fn parse_output_clause(clause: &[Token]) -> Result { +fn parse_output_clause(clause: &[Token]) -> Result, String> { let mut i = 1usize; let expr_tokens = &clause[i..(clause.len() - 1)]; @@ -440,7 +443,7 @@ fn parse_output_clause(clause: &[Token]) -> Result { Ok(Clause::OutputValue { value: expr }) } -fn parse_input_clause(clause: &[Token]) -> Result { +fn parse_input_clause(clause: &[Token]) -> Result, String> { let mut i = 1usize; let (var, len) = parse_var_target(&clause[i..])?; @@ -453,7 +456,7 @@ fn parse_input_clause(clause: &[Token]) -> Result { Ok(Clause::InputVariable { var }) } -fn parse_assert_clause(clause: &[Token]) -> Result { +fn parse_assert_clause(clause: &[Token]) -> Result, String> { let mut i = 1usize; let (var, len) = parse_var_target(&clause[i..])?; @@ -485,7 +488,9 @@ fn parse_assert_clause(clause: &[Token]) -> Result { // let g @4,2 = 68; // or // let p @3 = 68; -fn parse_location_specifier(tokens: &[Token]) -> Result<(LocationSpecifier, usize), String> { +fn parse_location_specifier( + tokens: &[Token], +) -> Result<(LocationSpecifier, usize), String> { if tokens.len() == 0 { return Ok((LocationSpecifier::None, 0)); } @@ -541,7 +546,7 @@ fn parse_location_specifier(tokens: &[Token]) -> Result<(LocationSpecifier, usiz } }; - return Ok((LocationSpecifier::Cell((x_offset, y_offset)), i)); + return Ok((LocationSpecifier::Cell(TapeCell2D(x_offset, y_offset)), i)); } Token::Name(_) => { // variable location specifier @@ -557,7 +562,7 @@ fn parse_location_specifier(tokens: &[Token]) -> Result<(LocationSpecifier, usiz Ok((LocationSpecifier::None, 0)) } -fn parse_brainfuck_clause(clause: &[Token]) -> Result { +fn parse_brainfuck_clause(clause: &[Token]) -> Result, String> { // bf {++--<><} // bf @3 {++--<><} // bf clobbers var1 var2 {++--<><} @@ -633,7 +638,7 @@ fn parse_brainfuck_clause(clause: &[Token]) -> Result { }) } -fn parse_function_definition_clause(clause: &[Token]) -> Result { +fn parse_function_definition_clause(clause: &[Token]) -> Result, String> { let mut i = 1usize; // function name let Token::Name(name) = &clause[i] else { @@ -674,7 +679,7 @@ fn parse_function_definition_clause(clause: &[Token]) -> Result }; let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; - let parsed_block: Vec = parse(block_tokens)?; + let parsed_block: Vec> = parse(block_tokens)?; Ok(Clause::DefineFunction { name: name.clone(), @@ -683,7 +688,7 @@ fn parse_function_definition_clause(clause: &[Token]) -> Result }) } -fn parse_function_call_clause(clause: &[Token]) -> Result { +fn parse_function_call_clause(clause: &[Token]) -> Result, String> { let mut i = 0usize; // Okay I didn't know this rust syntax, could have used it all over the place let Token::Name(name) = &clause[i] else { @@ -784,7 +789,7 @@ fn parse_var_target(tokens: &[Token]) -> Result<(VariableTarget, usize), String> fn parse_var_definition( tokens: &[Token], allow_location: bool, -) -> Result<(VariableDefinition, usize), String> { +) -> Result<(VariableDefinition, usize), String> { let mut i = 0usize; let mut var_type = match &tokens[i] { Token::Cell => { @@ -1213,17 +1218,17 @@ impl Sign { } #[derive(Debug, Clone)] -pub enum Clause { +pub enum Clause { DeclareVariable { - var: VariableDefinition, + var: VariableDefinition, }, DefineVariable { - var: VariableDefinition, + var: VariableDefinition, value: Expression, }, DefineStruct { name: String, - fields: Vec, + fields: Vec>, }, AddToVariable { var: VariableTarget, @@ -1244,12 +1249,12 @@ pub enum Clause { CopyLoop { source: Expression, targets: Vec, - block: Vec, + block: Vec>, is_draining: bool, }, WhileLoop { var: VariableTarget, - block: Vec, + block: Vec>, }, OutputValue { value: Expression, @@ -1259,8 +1264,9 @@ pub enum Clause { }, DefineFunction { name: String, - arguments: Vec, - block: Vec, + // TODO: fix the type here, as function definitions don't actually need location specifiers and therefore don't need a tape cell type + arguments: Vec>, + block: Vec>, }, CallFunction { function_name: String, @@ -1268,20 +1274,20 @@ pub enum Clause { }, IfElse { condition: Expression, - if_block: Option>, - else_block: Option>, + if_block: Option>>, + else_block: Option>>, }, - Block(Vec), + Block(Vec>), InlineBrainfuck { - location_specifier: LocationSpecifier, + location_specifier: LocationSpecifier, clobbered_variables: Vec, - operations: Vec, + operations: Vec>, }, } // extended brainfuck opcodes to include mastermind code blocks #[derive(Debug, Clone)] -pub enum ExtendedOpcode { +pub enum ExtendedOpcode { Add, Subtract, Right, @@ -1290,7 +1296,7 @@ pub enum ExtendedOpcode { CloseLoop, Output, Input, - Block(Vec), + Block(Vec>), Up, Down, } @@ -1305,22 +1311,22 @@ pub enum VariableTypeReference { } #[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub enum LocationSpecifier { +pub enum LocationSpecifier { None, Cell(TapeCell), Variable(VariableTarget), } -impl LocationSpecifier { +impl LocationSpecifier { fn is_none(&self) -> bool { matches!(self, LocationSpecifier::None) } } #[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub struct VariableDefinition { +pub struct VariableDefinition { pub name: String, pub var_type: VariableTypeReference, - pub location_specifier: LocationSpecifier, + pub location_specifier: LocationSpecifier, // Infinite {name: String, pattern: ???}, } @@ -1342,8 +1348,8 @@ pub struct VariableTarget { pub is_spread: bool, } impl VariableTarget { - /// converts a definition to a target for use with definition clauses (as opposed to declarations) - pub fn from_definition(var_def: &VariableDefinition) -> Self { + /// convert a definition to a target for use with definition clauses (as opposed to declarations) + pub fn from_definition(var_def: &VariableDefinition) -> Self { VariableTarget { name: var_def.name.clone(), subfields: None, @@ -1355,18 +1361,18 @@ impl VariableTarget { impl Display for VariableTypeReference { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match &self { - VariableTypeReference::Cell => f.write_str(&format!("cell")), + VariableTypeReference::Cell => f.write_str("cell"), VariableTypeReference::Struct(struct_name) => { - f.write_str(&format!("struct {struct_name}")) + f.write_fmt(format_args!("struct {struct_name}")) } VariableTypeReference::Array(element_type, len) => { - f.write_str(&format!("{element_type}[{len}]")) + f.write_fmt(format_args!("{element_type}[{len}]")) } } } } -impl Display for VariableDefinition { +impl Display for VariableDefinition { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str(&format!("{} {}", self.var_type, self.name))?; match &self.location_specifier { @@ -1380,12 +1386,12 @@ impl Display for VariableDefinition { } } -impl Display for LocationSpecifier { +impl Display for LocationSpecifier { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str("@")?; match self { - LocationSpecifier::Cell(cell) => f.write_str(&format!("{:?}", cell))?, - LocationSpecifier::Variable(var) => f.write_str(&format!("{}", var))?, + LocationSpecifier::Cell(cell) => f.write_str(&format!("{cell}"))?, + LocationSpecifier::Variable(var) => f.write_str(&format!("{var}"))?, LocationSpecifier::None => (), } diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index 09e63d1..d4285e9 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -4,9 +4,9 @@ #[cfg(test)] pub mod black_box_tests { use crate::{ + backend::{BrainfuckOpcodes, Opcode}, brainfuck::{bvm_tests::run_code, BVMConfig}, - builder::{BrainfuckOpcodes, Builder, Opcode}, - compiler::Compiler, + misc::MastermindContext, parser::parse, tokeniser::{tokenise, Token}, MastermindConfig, @@ -90,19 +90,13 @@ pub mod black_box_tests { const TESTING_BVM_MAX_STEPS: usize = 100_000_000; fn compile_and_run(program: String, input: String) -> Result { - // println!("{program}"); - // compile mastermind + let ctx = MastermindContext { config: &OPT_NONE }; let tokens: Vec = tokenise(&program)?; - // println!("{tokens:#?}"); let clauses = parse(&tokens)?; - // println!("{clauses:#?}"); - let instructions = Compiler { config: &OPT_NONE } - .compile(&clauses, None)? - .finalise_instructions(false); - // println!("{instructions:#?}"); - let bf_program = Builder { config: &OPT_NONE }.build(instructions, false)?; + let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); + let bf_program = ctx.ir_to_bf(instructions, false)?; let bfs = bf_program.to_string(); - // println!("{}", bfs); + // run generated brainfuck with input Ok(run_code( BVM_CONFIG_1D, @@ -116,23 +110,13 @@ pub mod black_box_tests { program: String, config: Option<&MastermindConfig>, ) -> Result, String> { - // println!("{program}"); - // compile mastermind + let ctx = MastermindContext { + config: config.unwrap_or(&OPT_NONE), + }; let tokens: Vec = tokenise(&program)?; - // println!("{tokens:#?}"); let clauses = parse(&tokens)?; - // println!("{clauses:#?}"); - let instructions = Compiler { - config: config.unwrap_or(&OPT_NONE), - } - .compile(&clauses, None)? - .finalise_instructions(false); - // println!("{instructions:#?}"); - let bf_code = Builder { - config: config.unwrap_or(&OPT_NONE), - } - .build(instructions, false)?; - // println!("{}", bfs); + let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); + let bf_code = ctx.ir_to_bf(instructions, false)?; Ok(bf_code) } From 279a8b17a2f86387bd177bada370d5b217aa66fe Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Sun, 12 Oct 2025 13:09:19 +1100 Subject: [PATCH 06/56] Remove None token hack --- compiler/src/frontend.rs | 5 +- compiler/src/misc.rs | 17 +++++ compiler/src/tests.rs | 126 ++++++++++++++++++++++++-------------- compiler/src/tokeniser.rs | 72 +++------------------- 4 files changed, 107 insertions(+), 113 deletions(-) diff --git a/compiler/src/frontend.rs b/compiler/src/frontend.rs index e106f6c..d0d38ae 100644 --- a/compiler/src/frontend.rs +++ b/compiler/src/frontend.rs @@ -10,7 +10,6 @@ use crate::{ Clause, Expression, ExtendedOpcode, LocationSpecifier, Reference, VariableDefinition, VariableTarget, VariableTargetReferenceChain, VariableTypeReference, }, - MastermindConfig, }; // memory stuff is all WIP and some comments may be incorrect @@ -502,8 +501,8 @@ impl MastermindContext<'_> { .build_ir(false); // compile without cleaning up top level variables, this is the brainfuck programmer's responsibility // it is also the brainfuck programmer's responsibility to return to the start position - let built_code = ctx.ir_to_bf(instructions, true)?; - expanded_bf.extend(built_code); + let bf_code = ctx.ir_to_bf(instructions, true)?; + expanded_bf.extend(bf_code); } ExtendedOpcode::Add => expanded_bf.push(Opcode::Add), ExtendedOpcode::Subtract => expanded_bf.push(Opcode::Subtract), diff --git a/compiler/src/misc.rs b/compiler/src/misc.rs index bf1430c..f592be5 100644 --- a/compiler/src/misc.rs +++ b/compiler/src/misc.rs @@ -27,6 +27,23 @@ pub struct MastermindConfig { pub enable_2d_grid: bool, } +impl Default for MastermindConfig { + fn default() -> MastermindConfig { + MastermindConfig { + optimise_generated_code: false, + optimise_generated_all_permutations: false, + optimise_cell_clearing: false, + optimise_unreachable_loops: false, + optimise_variable_usage: false, + optimise_memory_allocation: false, + optimise_constants: false, + optimise_empty_blocks: false, + memory_allocation_method: 0, + enable_2d_grid: false, + } + } +} + impl MastermindConfig { pub fn new(optimise_bitmask: usize) -> MastermindConfig { MastermindConfig { diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index d4285e9..aff4c89 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -153,6 +153,42 @@ pub mod black_box_tests { assert_eq!(desired_output, output) } + #[test] + fn empty_program_1() { + let program = String::from(""); + let input = String::from(""); + let desired_output = String::from(""); + let output = compile_and_run(program, input).expect(""); + assert_eq!(desired_output, output) + } + + #[test] + fn empty_program_2() { + let program = String::from(";"); + let input = String::from(""); + let desired_output = String::from(""); + let output = compile_and_run(program, input).expect(""); + assert_eq!(desired_output, output) + } + + #[test] + fn empty_program_3() { + let program = String::from(";;;;;;"); + let input = String::from(""); + let desired_output = String::from(""); + let output = compile_and_run(program, input).expect(""); + assert_eq!(desired_output, output) + } + + #[test] + fn empty_program_4() { + let program = String::from(";;{;{;};};;;"); + let input = String::from(""); + let desired_output = String::from(""); + let output = compile_and_run(program, input).expect(""); + assert_eq!(desired_output, output) + } + #[test] fn hello_1() { let program = String::from( @@ -198,7 +234,7 @@ output 10; #[test] fn hello_3() { let program = String::from( - r#"; + r#" output 'h' ;;; // comment cell[5] EEL = "ello\n"; @@ -258,7 +294,7 @@ output ['o', '.', '\n']; #[test] fn expressions_1() { let program = String::from( - r#"; + r#" output '@' + 256 + 1 + false + true + 'e' - '@'; "#, ); @@ -272,7 +308,7 @@ output '@' + 256 + 1 + false + true + 'e' - '@'; #[test] fn expressions_2() { let program = String::from( - r#"; + r#" cell p = 9 - (true + true -(-7)); if not p { output "Hi friend!\n"; @@ -297,7 +333,7 @@ if q { #[test] fn expressions_3() { let program = String::from( - r#"; + r#" if 56 - 7 { output 'A'; } else { @@ -329,7 +365,7 @@ if not_a - 'a' { #[test] fn expressions_4() { let program = String::from( - r#"; + r#" cell x = 5; cell A = 'A'; @@ -351,7 +387,7 @@ output A; #[test] fn assignments_1() { let program = String::from( - r#"; + r#" cell x = 5; output '0' + x; x += 1; @@ -368,7 +404,7 @@ output '0' + x; #[test] fn assignments_2() { let program = String::from( - r#"; + r#" cell x = 5; output '0' + x; x = x + 1; @@ -384,7 +420,7 @@ output '0' + x; #[test] fn assignments_3() { let program = String::from( - r#"; + r#" cell x = 5; output '0' + x; x += 1 + x; @@ -401,7 +437,7 @@ output '0' + x; #[test] fn assignments_4() { let program = String::from( - r#"; + r#" cell x = 2; output '0' + x; x = x + x + x; @@ -418,7 +454,7 @@ output '0' + x; #[test] fn assignments_5() { let program = String::from( - r#"; + r#" cell x = 2; x = (2 + 3) - ((x + 4) + 1) + 4 - (12) + (3 + 10); output '0' + x; @@ -434,7 +470,7 @@ output '0' + x; #[test] fn assignments_6() { let program = String::from( - r#"; + r#" cell[2] x = [4, 5]; x[0] = x[0] + 4; x[1] = x[1] - 3; @@ -454,7 +490,7 @@ output *x; #[test] fn assignments_7() { let program = String::from( - r#"; + r#" cell[2] x = [1, 2]; x[0] = x[1] + 5; // 7 x[1] = x[0] + x[1]; // 9 @@ -474,7 +510,7 @@ output *x; #[test] fn assignments_8() { let program = String::from( - r#"; + r#" cell x = 128; output x - 2; "#, @@ -489,7 +525,7 @@ output x - 2; #[test] fn assignments_8a() { let program = String::from( - r#"; + r#" cell x = 127; cell y = 64; x += y + y; @@ -506,7 +542,7 @@ output x + 'f' + 1; #[test] fn assignments_8b() { let program = String::from( - r#"; + r#" cell x = 128; cell y = 64; x += y + y; @@ -523,7 +559,7 @@ output x + 'f'; #[test] fn assignments_9() -> Result<(), String> { let program = String::from( - r#"; + r#" cell x = 128; x += 128; output x + 'f'; @@ -542,7 +578,7 @@ output x + 'f'; #[test] fn assignments_9a() -> Result<(), String> { let program = String::from( - r#"; + r#" cell x = 126; x += 2; x += 128; @@ -1370,7 +1406,7 @@ output g[2][3]; #[test] fn structs_1() { let program = String::from( - r#"; + r#" struct AA { cell green; cell yellow; @@ -1396,7 +1432,7 @@ output '0' + a.yellow; #[test] fn structs_2() { let program = String::from( - r#"; + r#" struct AA { cell green; cell yellow; @@ -1423,7 +1459,7 @@ output '0' + a.yellow; #[test] fn structs_3() { let program = String::from( - r#"; + r#" struct AA { cell green; cell yellow; @@ -1452,7 +1488,7 @@ output a.green; #[test] fn structs_3a() { let program = String::from( - r#"; + r#" struct AA a; fn input_AA(struct AA bbb) { @@ -1485,7 +1521,7 @@ struct AA { #[test] fn structs_3b() { let program = String::from( - r#"; + r#" struct AA a; fn input_AA(struct AA bbb) { @@ -1523,7 +1559,7 @@ struct AA { #[test] fn structs_4a() { let program = String::from( - r#"; + r#" struct AA a; input a.green; input a.yellow; @@ -1557,7 +1593,7 @@ output '\n'; #[test] fn structs_4b() { let program = String::from( - r#"; + r#" struct AA a; input a.green; input a.yellow; @@ -1585,7 +1621,7 @@ output '\n'; #[test] fn structs_4c() { let program = String::from( - r#"; + r#" struct AA a; input a.green; input a.yellow; @@ -1624,7 +1660,7 @@ output '\n'; #[should_panic] fn structs_4d() { let program = String::from( - r#"; + r#" struct AA a; input *a.reds; @@ -1647,7 +1683,7 @@ output '\n'; #[test] fn structs_5() { let program = String::from( - r#"; + r#" struct AA { cell green; } @@ -1693,7 +1729,7 @@ struct AAA { #[test] fn structs_6() { let program = String::from( - r#"; + r#" struct AA { cell green; } @@ -1730,7 +1766,7 @@ output as[1].green; #[test] fn structs_7() { let program = String::from( - r#"; + r#" struct BB { cell green; } @@ -1778,7 +1814,7 @@ output as[1].bbb[2].green; #[test] fn structs_7a() { let program = String::from( - r#"; + r#" struct BB { cell green @2; } @@ -1826,7 +1862,7 @@ output as[1].bbb[2].green; #[test] fn structs_bf_1() { let program = String::from( - r#"; + r#" struct Frame { cell marker @3; cell value @0; @@ -1864,7 +1900,7 @@ bf @2 { #[should_panic] fn structs_bf_1a() { let program = String::from( - r#"; + r#" struct Frame { cell marker @2; cell value @0; @@ -1887,7 +1923,7 @@ struct Frame f; #[should_panic] fn structs_bf_1b() { let program = String::from( - r#"; + r#" struct Frame { cell marker @-2; cell value @0; @@ -1908,7 +1944,7 @@ struct Frame f; #[should_panic] fn structs_bf_1c() { let program = String::from( - r#"; + r#" struct G { cell a @1; cell b @1; @@ -1932,7 +1968,7 @@ output g.b; #[test] fn structs_bf_2() { let program = String::from( - r#"; + r#" struct Green { // no @0 cell cell blue @1; @@ -1957,7 +1993,7 @@ bf @4 { #[test] fn sizeof_0() { let program = String::from( - r#"; + r#" output '0' + sizeof(cell); "#, ); @@ -1972,7 +2008,7 @@ output '0' + sizeof(cell); #[test] fn sizeof_0a() { let program = String::from( - r#"; + r#" output '0' + sizeof(cell[5]); "#, ); @@ -1987,7 +2023,7 @@ output '0' + sizeof(cell[5]); #[test] fn sizeof_0b() { let program = String::from( - r#"; + r#" cell a; cell b[4]; output '0' + sizeof(a); @@ -2006,7 +2042,7 @@ output '0' + sizeof(b[2]); #[test] fn sizeof_1() { let program = String::from( - r#"; + r#" struct Green { cell blue; } @@ -2025,7 +2061,7 @@ output '0' + s; #[test] fn sizeof_1a() { let program = String::from( - r#"; + r#" struct Green { cell blue; } @@ -2044,7 +2080,7 @@ output '0' + s; #[test] fn sizeof_1b() { let program = String::from( - r#"; + r#" struct Green { cell blue; } @@ -2063,7 +2099,7 @@ output '0' + s; #[test] fn sizeof_2() { let program = String::from( - r#"; + r#" struct Green { cell blue; cell red; @@ -2083,7 +2119,7 @@ output '0' + sizeof(g); #[test] fn sizeof_3() { let program = String::from( - r#"; + r#" struct Green { cell blue; cell[5] red; @@ -2107,7 +2143,7 @@ output '0' + sizeof(g[0].red); #[test] fn sizeof_4() { let program = String::from( - r#"; + r#" struct Green { cell blue @2; } @@ -2128,7 +2164,7 @@ output '0' + sizeof(g[2].blue) #[test] fn sizeof_5() { let program = String::from( - r#"; + r#" struct Blue { cell[2] blues; } diff --git a/compiler/src/tokeniser.rs b/compiler/src/tokeniser.rs index 58bddc7..04413c9 100644 --- a/compiler/src/tokeniser.rs +++ b/compiler/src/tokeniser.rs @@ -88,7 +88,6 @@ pub fn tokenise(source: &String) -> Result, String> { "Not enough characters in string literal token, \ this should never occur. {raw:#?}" ); - let oiasdhfidush = &raw[1..(raw.len() - 1)]; tokens.push(Token::String(tokenise_raw_string_literal( &raw[1..(raw.len() - 1)], )?)); @@ -119,8 +118,6 @@ this should never occur. {raw:#?}" Ok(tokens .into_iter() .filter(|t| !matches!(t, Token::None)) - // stick a None token on the end to fix some weird parsing errors (seems silly but why not?) - .chain([Token::None]) .collect()) } @@ -253,96 +250,45 @@ mod tokeniser_tests { Token::Character('b'), Token::Character('c'), Token::Character(' '), - // TODO: remove this None, fix the code that needs it - Token::None, ], ); } #[test] fn character_literals_2() { - _tokenisation_test( - r#"'\n'"#, - &[ - Token::Character('\n'), - // TODO: remove this None, fix the code that needs it - Token::None, - ], - ); + _tokenisation_test(r#"'\n'"#, &[Token::Character('\n')]); } #[test] fn character_literals_3() { - _tokenisation_test( - r#"'"'"#, - &[ - Token::Character('"'), - // TODO: remove this None, fix the code that needs it - Token::None, - ], - ); + _tokenisation_test(r#"'"'"#, &[Token::Character('"')]); } #[test] fn character_literals_4() { - _tokenisation_test( - r#"'\''"#, - &[ - Token::Character('\''), - // TODO: remove this None, fix the code that needs it - Token::None, - ], - ); + _tokenisation_test(r#"'\''"#, &[Token::Character('\'')]); } #[test] #[should_panic] fn character_literals_5() { - _tokenisation_test( - r#"'\'"#, - &[ - Token::Character('\\'), - // TODO: remove this None, fix the code that needs it - Token::None, - ], - ); + _tokenisation_test(r#"'\'"#, &[Token::Character('\\')]); } #[test] #[should_panic] fn character_literals_6() { - _tokenisation_test( - r#"'aa'"#, - &[ - Token::String(String::from("aa")), - // TODO: remove this None, fix the code that needs it - Token::None, - ], - ); + _tokenisation_test(r#"'aa'"#, &[Token::String(String::from("aa"))]); } #[test] fn string_literals_1() { - _tokenisation_test( - "\"hello\"", - &[ - Token::String(String::from("hello")), - // TODO: remove this None, fix the code that needs it - Token::None, - ], - ); + _tokenisation_test("\"hello\"", &[Token::String(String::from("hello"))]); } #[test] fn string_literals_2() { - _tokenisation_test( - r#""""#, - &[ - Token::String(String::from("")), - // TODO: remove this None, fix the code that needs it - Token::None, - ], - ); + _tokenisation_test(r#""""#, &[Token::String(String::from(""))]); } #[test] @@ -352,8 +298,6 @@ mod tokeniser_tests { &[ Token::String(String::from("")), Token::String(String::from("")), - // TODO: remove this None, fix the code that needs it - Token::None, ], ); } @@ -365,8 +309,6 @@ mod tokeniser_tests { &[ Token::String(String::from("\"")), Token::String(String::from(" ")), - // TODO: remove this None, fix the code that needs it - Token::None, ], ); } From 844c1864e9e821d075c8b58d15cbe32ac194afd5 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Mon, 13 Oct 2025 14:27:53 +1100 Subject: [PATCH 07/56] Rename Opcode type to Opcode2D and tweak tape origin logic --- compiler/src/backend.rs | 107 ++++++++++++++++------------ compiler/src/brainfuck_optimiser.rs | 74 +++++++++---------- compiler/src/constants_optimiser.rs | 6 +- compiler/src/frontend.rs | 28 ++++---- compiler/src/lib.rs | 2 +- compiler/src/main.rs | 2 +- compiler/src/tests.rs | 8 +-- compiler/src/tokeniser.rs | 2 + 8 files changed, 123 insertions(+), 106 deletions(-) diff --git a/compiler/src/backend.rs b/compiler/src/backend.rs index 58fb4e1..cb6e341 100644 --- a/compiler/src/backend.rs +++ b/compiler/src/backend.rs @@ -35,8 +35,8 @@ impl MastermindContext<'_> { pub fn ir_to_bf( &self, instructions: Vec>, - return_to_origin: bool, - ) -> Result, String> { + return_to_cell: Option, + ) -> Result, String> { let mut allocator = CellAllocator::new(); let mut alloc_map: HashMap< MemoryId, @@ -164,7 +164,7 @@ outside of loop it was allocated" // skip the loop if the optimisations are turned on and we know the value is 0 if open { ops.move_to_cell(cell); - ops.push(Opcode::OpenLoop); + ops.push(Opcode2D::OpenLoop); loop_stack.push(cell); current_loop_depth += 1; } @@ -194,7 +194,7 @@ outside of loop it was allocated" current_loop_depth -= 1; ops.move_to_cell(cell); - ops.push(Opcode::CloseLoop); + ops.push(Opcode2D::CloseLoop); // if a loop finishes on a cell then it is guaranteed to be 0 based on brainfuck itself // I did encounter issues with nested loops here, interesting @@ -266,7 +266,7 @@ outside of loop it was allocated" let known_value = &mut known_values[mem_idx]; ops.move_to_cell(cell); - ops.push(Opcode::Input); + ops.push(Opcode2D::Input); // no way to know at compile time what the input to the program will be *known_value = None; } @@ -299,11 +299,11 @@ outside of loop it was allocated" let imm = *known_value as i8; if imm > 0 { for _ in 0..imm { - ops.push(Opcode::Subtract); + ops.push(Opcode2D::Subtract); } } else if imm < 0 { for _ in 0..-imm { - ops.push(Opcode::Add); + ops.push(Opcode2D::Add); } } clear = false; @@ -311,7 +311,7 @@ outside of loop it was allocated" } if clear { - ops.push(Opcode::Clear); + ops.push(Opcode2D::Clear); } if *alloc_loop_depth == current_loop_depth { @@ -334,7 +334,7 @@ outside of loop it was allocated" let cell = TapeCell2D(cell_base.0 + mem_idx as i32, cell_base.1); ops.move_to_cell(cell); - ops.push(Opcode::Output); + ops.push(Opcode2D::Output); } Instruction::InsertBrainfuckAtCell(operations, location_specifier) => { // move to the correct cell, based on the location specifier @@ -364,8 +364,8 @@ outside of loop it was allocated" } // this is used in embedded brainfuck contexts to preserve head position - if return_to_origin { - ops.move_to_cell(TapeCell2D(0, 0)); + if let Some(origin_cell) = return_to_cell { + ops.move_to_cell(origin_cell); } Ok(ops.opcodes) @@ -591,8 +591,23 @@ impl CellAllocator { } } +// #[derive(Clone, Copy, Debug)] +// pub enum Opcode { +// Add, +// Subtract, +// Right, +// Left, +// OpenLoop, +// CloseLoop, +// Output, +// Input, +// Clear, +// Up, +// Down, +// } + #[derive(Clone, Copy, Debug)] -pub enum Opcode { +pub enum Opcode2D { Add, Subtract, Right, @@ -607,7 +622,7 @@ pub enum Opcode { } pub struct BFBuilder { - opcodes: Vec, + opcodes: Vec, pub head_pos: TapeCell2D, } @@ -616,47 +631,47 @@ pub trait BrainfuckOpcodes { fn from_str(s: &str) -> Self; } -impl BrainfuckOpcodes for Vec { +impl BrainfuckOpcodes for Vec { fn to_string(self) -> String { let mut s = String::new(); self.into_iter().for_each(|o| { s.push_str(match o { - Opcode::Add => "+", - Opcode::Subtract => "-", - Opcode::Right => ">", - Opcode::Left => "<", - Opcode::OpenLoop => "[", - Opcode::CloseLoop => "]", - Opcode::Output => ".", - Opcode::Input => ",", - Opcode::Clear => "[-]", - Opcode::Up => "^", - Opcode::Down => "v", + Opcode2D::Add => "+", + Opcode2D::Subtract => "-", + Opcode2D::Right => ">", + Opcode2D::Left => "<", + Opcode2D::OpenLoop => "[", + Opcode2D::CloseLoop => "]", + Opcode2D::Output => ".", + Opcode2D::Input => ",", + Opcode2D::Clear => "[-]", + Opcode2D::Up => "^", + Opcode2D::Down => "v", }) }); s } - fn from_str(s: &str) -> Self { + fn from_str(s: &str) -> Vec { let mut ops = Vec::new(); let mut i = 0; while i < s.len() { let substr = &s[i..]; if substr.starts_with("[-]") { - ops.push(Opcode::Clear); + ops.push(Opcode2D::Clear); i += 3; } else { match substr.chars().next().unwrap() { - '+' => ops.push(Opcode::Add), - '-' => ops.push(Opcode::Subtract), - '>' => ops.push(Opcode::Right), - '<' => ops.push(Opcode::Left), - '[' => ops.push(Opcode::OpenLoop), - ']' => ops.push(Opcode::CloseLoop), - '.' => ops.push(Opcode::Output), - ',' => ops.push(Opcode::Input), - '^' => ops.push(Opcode::Up), - 'v' => ops.push(Opcode::Down), + '+' => ops.push(Opcode2D::Add), + '-' => ops.push(Opcode2D::Subtract), + '>' => ops.push(Opcode2D::Right), + '<' => ops.push(Opcode2D::Left), + '[' => ops.push(Opcode2D::OpenLoop), + ']' => ops.push(Opcode2D::CloseLoop), + '.' => ops.push(Opcode2D::Output), + ',' => ops.push(Opcode2D::Input), + '^' => ops.push(Opcode2D::Up), + 'v' => ops.push(Opcode2D::Down), _ => (), // could put a little special opcode in for other characters } i += 1; @@ -674,7 +689,7 @@ impl BrainfuckOpcodes for BFBuilder { fn from_str(s: &str) -> Self { BFBuilder { - opcodes: BrainfuckOpcodes::from_str(s), + opcodes: Vec::from_str(s), head_pos: TapeCell2D(0, 0), } } @@ -690,12 +705,12 @@ impl BFBuilder { pub fn len(&self) -> usize { self.opcodes.len() } - pub fn push(&mut self, op: Opcode) { + pub fn push(&mut self, op: Opcode2D) { self.opcodes.push(op); } pub fn extend(&mut self, ops: T) where - T: IntoIterator, + T: IntoIterator, { self.opcodes.extend(ops); } @@ -707,23 +722,23 @@ impl BFBuilder { //Move x level if x_pos < x { for _ in x_pos..x { - self.opcodes.push(Opcode::Right); + self.opcodes.push(Opcode2D::Right); } } else if x < x_pos { // theoretically equivalent to cell..head_pos? for _ in ((x + 1)..=x_pos).rev() { - self.opcodes.push(Opcode::Left); + self.opcodes.push(Opcode2D::Left); } } //Move y level if y_pos < y { for _ in y_pos..y { - self.opcodes.push(Opcode::Up); + self.opcodes.push(Opcode2D::Up); } } else if y < y_pos { // theoretically equivalent to cell..head_pos? for _ in ((y + 1)..=y_pos).rev() { - self.opcodes.push(Opcode::Down); + self.opcodes.push(Opcode2D::Down); } } self.head_pos = cell; @@ -732,12 +747,12 @@ impl BFBuilder { pub fn add_to_current_cell(&mut self, imm: i8) { if imm > 0 { for _ in 0..imm { - self.opcodes.push(Opcode::Add); + self.opcodes.push(Opcode2D::Add); } } else if imm < 0 { // needs to be i32 because -(-128) = -128 in i8-land for _ in 0..-(imm as i32) { - self.opcodes.push(Opcode::Subtract); + self.opcodes.push(Opcode2D::Subtract); } } } diff --git a/compiler/src/brainfuck_optimiser.rs b/compiler/src/brainfuck_optimiser.rs index e623e21..7350945 100644 --- a/compiler/src/brainfuck_optimiser.rs +++ b/compiler/src/brainfuck_optimiser.rs @@ -1,4 +1,4 @@ -use crate::backend::Opcode; +use crate::backend::{Opcode2D, TapeCell2D}; use itertools::Itertools; use std::{collections::HashMap, num::Wrapping}; @@ -7,7 +7,7 @@ use std::{collections::HashMap, num::Wrapping}; // simple naive brainfuck optimisations // TODO: factor in [-] into optimisations (doing) -pub fn optimise(program: Vec, exhaustive: bool) -> Vec { +pub fn optimise(program: Vec, exhaustive: bool) -> Vec { let mut output = Vec::new(); // get stretch of characters to optimise (+-<>) @@ -16,16 +16,16 @@ pub fn optimise(program: Vec, exhaustive: bool) -> Vec { while i < program.len() { let op = program[i]; match op { - Opcode::Add - | Opcode::Subtract - | Opcode::Right - | Opcode::Left - | Opcode::Clear - | Opcode::Up - | Opcode::Down => { + Opcode2D::Add + | Opcode2D::Subtract + | Opcode2D::Right + | Opcode2D::Left + | Opcode2D::Clear + | Opcode2D::Up + | Opcode2D::Down => { subset.push(op); } - Opcode::OpenLoop | Opcode::CloseLoop | Opcode::Input | Opcode::Output => { + Opcode2D::OpenLoop | Opcode2D::CloseLoop | Opcode2D::Input | Opcode2D::Output => { // optimise subset and push let optimised_subset = optimise_subset(subset, exhaustive); output.extend(optimised_subset); @@ -41,57 +41,57 @@ pub fn optimise(program: Vec, exhaustive: bool) -> Vec { } fn move_position( - mut program: Vec, - old_position: &(i32, i32), - new_position: &(i32, i32), -) -> Vec { + mut program: Vec, + old_position: &TapeCell2D, + new_position: &TapeCell2D, +) -> Vec { if old_position != new_position { if old_position.0 < new_position.0 { for _ in 0..(new_position.0 - old_position.0) { - program.push(Opcode::Right); + program.push(Opcode2D::Right); } } else { for _ in 0..(old_position.0 - new_position.0) { - program.push(Opcode::Left); + program.push(Opcode2D::Left); } } if old_position.1 < new_position.1 { for _ in 0..(new_position.1 - old_position.1) { - program.push(Opcode::Up); + program.push(Opcode2D::Up); } } else { for _ in 0..(old_position.1 - new_position.1) { - program.push(Opcode::Down); + program.push(Opcode2D::Down); } } } program } -fn optimise_subset(run: Vec, exhaustive: bool) -> Vec { +fn optimise_subset(run: Vec, exhaustive: bool) -> Vec { #[derive(Clone)] enum Change { Add(Wrapping), Set(Wrapping), } - let mut tape: HashMap<(i32, i32), Change> = HashMap::new(); - let start = (0, 0); - let mut head = (0, 0); + let mut tape: HashMap = HashMap::new(); + let start = TapeCell2D(0, 0); + let mut head = TapeCell2D(0, 0); let mut i = 0; //Generate a map of cells we change and how we plan to change them while i < run.len() { let op = run[i]; match op { - Opcode::Clear => { + Opcode2D::Clear => { tape.insert(head, Change::Set(Wrapping(0i8))); } - Opcode::Subtract | Opcode::Add => { + Opcode2D::Subtract | Opcode2D::Add => { let mut change = tape.remove(&head).unwrap_or(Change::Add(Wrapping(0i8))); let (Change::Add(val) | Change::Set(val)) = &mut change; *val += match op { - Opcode::Add => 1, - Opcode::Subtract => -1, + Opcode2D::Add => 1, + Opcode2D::Subtract => -1, _ => 0, }; @@ -106,16 +106,16 @@ fn optimise_subset(run: Vec, exhaustive: bool) -> Vec { } } } - Opcode::Right => { + Opcode2D::Right => { head.0 += 1; } - Opcode::Left => { + Opcode2D::Left => { head.0 -= 1; } - Opcode::Up => { + Opcode2D::Up => { head.1 += 1; } - Opcode::Down => { + Opcode2D::Down => { head.1 -= 1; } _ => (), @@ -155,14 +155,14 @@ fn optimise_subset(run: Vec, exhaustive: bool) -> Vec { output = move_position(output, &position, cell); position = *cell; if let Change::Set(_) = change { - output.push(Opcode::Clear); + output.push(Opcode2D::Clear); } let (Change::Add(v) | Change::Set(v)) = change; let v = v.0; for _ in 0..(v as i32).abs() { output.push(match v == -128 || v > 0 { - true => Opcode::Add, - false => Opcode::Subtract, + true => Opcode2D::Add, + false => Opcode2D::Subtract, }); } } @@ -174,7 +174,7 @@ fn optimise_subset(run: Vec, exhaustive: bool) -> Vec { for _ in 0..tape.len() { if !tape.is_empty() { let mut min_distance = i32::MAX; - let mut next_position = (0, 0); + let mut next_position = TapeCell2D(0, 0); for (cell, _value) in tape.iter() { if (cell.0 - position.0).abs() + (cell.1 - position.1).abs() < min_distance { min_distance = (cell.0 - position.0).abs() + (cell.1 - position.1).abs(); @@ -187,14 +187,14 @@ fn optimise_subset(run: Vec, exhaustive: bool) -> Vec { //Now Update the output with correct opcodes let change = tape.remove(&next_position).unwrap(); if let Change::Set(_) = change { - output.push(Opcode::Clear); + output.push(Opcode2D::Clear); } let (Change::Add(v) | Change::Set(v)) = change; let v = v.0; for _ in 0..(v as i32).abs() { output.push(match v == -128 || v > 0 { - true => Opcode::Add, - false => Opcode::Subtract, + true => Opcode2D::Add, + false => Opcode2D::Subtract, }); } } diff --git a/compiler/src/constants_optimiser.rs b/compiler/src/constants_optimiser.rs index aa67756..fc43caf 100644 --- a/compiler/src/constants_optimiser.rs +++ b/compiler/src/constants_optimiser.rs @@ -1,5 +1,5 @@ // TODO: make unit tests for this -use crate::backend::{BFBuilder, Opcode, TapeCell2D}; +use crate::backend::{BFBuilder, Opcode2D, TapeCell2D}; // basically, most ascii characters are large numbers, which are more efficient to calculate with multiplication than with a bunch of + or - // an optimising brainfuck runtime will prefer a long string of +++++ or ----- however the goal of mastermind is to be used for code golf, which is not about speed @@ -76,7 +76,7 @@ pub fn calculate_optimal_addition( ops.move_to_cell(temp_cell); ops.add_to_current_cell(a as i8); - ops.push(Opcode::OpenLoop); + ops.push(Opcode2D::OpenLoop); ops.add_to_current_cell(-1); ops.move_to_cell(target_cell); if value < 0 { @@ -85,7 +85,7 @@ pub fn calculate_optimal_addition( ops.add_to_current_cell(b as i8); } ops.move_to_cell(temp_cell); - ops.push(Opcode::CloseLoop); + ops.push(Opcode2D::CloseLoop); ops.move_to_cell(target_cell); if value < 0 { ops.add_to_current_cell(-(c as i8)); diff --git a/compiler/src/frontend.rs b/compiler/src/frontend.rs index d0d38ae..c6b94a7 100644 --- a/compiler/src/frontend.rs +++ b/compiler/src/frontend.rs @@ -3,7 +3,7 @@ use std::{collections::HashMap, iter::zip}; use crate::{ - backend::{Opcode, TapeCell2D}, + backend::{Opcode2D, TapeCell2D}, macros::macros::{r_assert, r_panic}, misc::MastermindContext, parser::{ @@ -485,7 +485,7 @@ impl MastermindContext<'_> { operations, } => { // loop through the opcodes - let mut expanded_bf: Vec = Vec::new(); + let mut expanded_bf: Vec = Vec::new(); for op in operations { match op { ExtendedOpcode::Block(mm_clauses) => { @@ -501,19 +501,19 @@ impl MastermindContext<'_> { .build_ir(false); // compile without cleaning up top level variables, this is the brainfuck programmer's responsibility // it is also the brainfuck programmer's responsibility to return to the start position - let bf_code = ctx.ir_to_bf(instructions, true)?; + let bf_code = ctx.ir_to_bf(instructions, Some(TapeCell2D(0, 0)))?; expanded_bf.extend(bf_code); } - ExtendedOpcode::Add => expanded_bf.push(Opcode::Add), - ExtendedOpcode::Subtract => expanded_bf.push(Opcode::Subtract), - ExtendedOpcode::Right => expanded_bf.push(Opcode::Right), - ExtendedOpcode::Left => expanded_bf.push(Opcode::Left), - ExtendedOpcode::OpenLoop => expanded_bf.push(Opcode::OpenLoop), - ExtendedOpcode::CloseLoop => expanded_bf.push(Opcode::CloseLoop), - ExtendedOpcode::Output => expanded_bf.push(Opcode::Output), - ExtendedOpcode::Input => expanded_bf.push(Opcode::Input), - ExtendedOpcode::Up => expanded_bf.push(Opcode::Up), - ExtendedOpcode::Down => expanded_bf.push(Opcode::Down), + ExtendedOpcode::Add => expanded_bf.push(Opcode2D::Add), + ExtendedOpcode::Subtract => expanded_bf.push(Opcode2D::Subtract), + ExtendedOpcode::Right => expanded_bf.push(Opcode2D::Right), + ExtendedOpcode::Left => expanded_bf.push(Opcode2D::Left), + ExtendedOpcode::OpenLoop => expanded_bf.push(Opcode2D::OpenLoop), + ExtendedOpcode::CloseLoop => expanded_bf.push(Opcode2D::CloseLoop), + ExtendedOpcode::Output => expanded_bf.push(Opcode2D::Output), + ExtendedOpcode::Input => expanded_bf.push(Opcode2D::Input), + ExtendedOpcode::Up => expanded_bf.push(Opcode2D::Up), + ExtendedOpcode::Down => expanded_bf.push(Opcode2D::Down), } } @@ -741,7 +741,7 @@ pub enum Instruction { ClearCell(CellReference), // not sure if this should be here, seems common enough that it should be AssertCellValue(CellReference, Option), // allows the user to hand-tune optimisations further OutputCell(CellReference), - InsertBrainfuckAtCell(Vec, CellLocation), + InsertBrainfuckAtCell(Vec, CellLocation), } #[derive(Debug, Clone)] diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index 5efc698..a6920c6 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -53,7 +53,7 @@ pub fn wasm_compile( let tokens = tokenise(&preprocessed_file)?; let parsed_syntax = parse(&tokens)?; let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); - let bf_code = ctx.ir_to_bf(instructions, false)?; + let bf_code = ctx.ir_to_bf(instructions, None)?; Ok(match config.optimise_generated_code { true => optimise(bf_code, config.optimise_generated_all_permutations).to_string(), diff --git a/compiler/src/main.rs b/compiler/src/main.rs index d817531..fcb339d 100644 --- a/compiler/src/main.rs +++ b/compiler/src/main.rs @@ -104,7 +104,7 @@ fn main() -> Result<(), String> { // second stage actually writes out the low-level instructions into brainfuck let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); - let bf_code = ctx.ir_to_bf(instructions, false)?; + let bf_code = ctx.ir_to_bf(instructions, None)?; match config.optimise_generated_code { true => optimise(bf_code, config.optimise_generated_all_permutations).to_string(), diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index aff4c89..ed18efe 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -4,7 +4,7 @@ #[cfg(test)] pub mod black_box_tests { use crate::{ - backend::{BrainfuckOpcodes, Opcode}, + backend::{BrainfuckOpcodes, Opcode2D}, brainfuck::{bvm_tests::run_code, BVMConfig}, misc::MastermindContext, parser::parse, @@ -94,7 +94,7 @@ pub mod black_box_tests { let tokens: Vec = tokenise(&program)?; let clauses = parse(&tokens)?; let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); - let bf_program = ctx.ir_to_bf(instructions, false)?; + let bf_program = ctx.ir_to_bf(instructions, None)?; let bfs = bf_program.to_string(); // run generated brainfuck with input @@ -109,14 +109,14 @@ pub mod black_box_tests { fn compile_program( program: String, config: Option<&MastermindConfig>, - ) -> Result, String> { + ) -> Result, String> { let ctx = MastermindContext { config: config.unwrap_or(&OPT_NONE), }; let tokens: Vec = tokenise(&program)?; let clauses = parse(&tokens)?; let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); - let bf_code = ctx.ir_to_bf(instructions, false)?; + let bf_code = ctx.ir_to_bf(instructions, None)?; Ok(bf_code) } diff --git a/compiler/src/tokeniser.rs b/compiler/src/tokeniser.rs index 04413c9..68e926a 100644 --- a/compiler/src/tokeniser.rs +++ b/compiler/src/tokeniser.rs @@ -9,6 +9,8 @@ pub fn tokenise(source: &String) -> Result, String> { .collect::>() .join(" "); + // mappings are a list of key * value tuples because we are doing "starts with" searches, + // meaning we can't look up in a hashtable let mappings = [ (" ", Token::None), (";", Token::Semicolon), From 06a00c0c0e79e0b5897706655be1766aeef18cbb Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Mon, 13 Oct 2025 16:07:10 +1100 Subject: [PATCH 08/56] Change optimisation injection for bf optimiser --- compiler/src/backend.rs | 2 +- compiler/src/brainfuck_optimiser.rs | 448 +++++++++++++++------------- compiler/src/frontend.rs | 15 +- compiler/src/lib.rs | 11 +- compiler/src/main.rs | 12 +- compiler/src/misc.rs | 5 +- compiler/src/tests.rs | 49 ++- 7 files changed, 287 insertions(+), 255 deletions(-) diff --git a/compiler/src/backend.rs b/compiler/src/backend.rs index cb6e341..d88a8fc 100644 --- a/compiler/src/backend.rs +++ b/compiler/src/backend.rs @@ -31,7 +31,7 @@ impl Display for TapeCell2D { } } -impl MastermindContext<'_> { +impl MastermindContext { pub fn ir_to_bf( &self, instructions: Vec>, diff --git a/compiler/src/brainfuck_optimiser.rs b/compiler/src/brainfuck_optimiser.rs index 7350945..af7f57f 100644 --- a/compiler/src/brainfuck_optimiser.rs +++ b/compiler/src/brainfuck_optimiser.rs @@ -1,46 +1,187 @@ -use crate::backend::{Opcode2D, TapeCell2D}; use itertools::Itertools; use std::{collections::HashMap, num::Wrapping}; -// post-compilation optimisations - -// simple naive brainfuck optimisations -// TODO: factor in [-] into optimisations (doing) - -pub fn optimise(program: Vec, exhaustive: bool) -> Vec { - let mut output = Vec::new(); - - // get stretch of characters to optimise (+-<>) - let mut i = 0; - let mut subset = Vec::new(); - while i < program.len() { - let op = program[i]; - match op { - Opcode2D::Add - | Opcode2D::Subtract - | Opcode2D::Right - | Opcode2D::Left - | Opcode2D::Clear - | Opcode2D::Up - | Opcode2D::Down => { - subset.push(op); - } - Opcode2D::OpenLoop | Opcode2D::CloseLoop | Opcode2D::Input | Opcode2D::Output => { - // optimise subset and push - let optimised_subset = optimise_subset(subset, exhaustive); - output.extend(optimised_subset); +use crate::{ + backend::{Opcode2D, TapeCell2D}, + misc::MastermindContext, +}; + +// originally trivial post-compilation brainfuck optimisations +// extended to 2D which makes it more difficult +impl MastermindContext { + pub fn optimise_bf_code(&self, program: Vec) -> Vec { + let mut output = Vec::new(); + + // get stretch of characters to optimise (+-<>) + let mut i = 0; + let mut subset = Vec::new(); + while i < program.len() { + let op = program[i]; + match op { + Opcode2D::Add + | Opcode2D::Subtract + | Opcode2D::Right + | Opcode2D::Left + | Opcode2D::Clear + | Opcode2D::Up + | Opcode2D::Down => { + subset.push(op); + } + Opcode2D::OpenLoop | Opcode2D::CloseLoop | Opcode2D::Input | Opcode2D::Output => { + // optimise subset and push + let optimised_subset = self.optimise_subset(subset); + output.extend(optimised_subset); - subset = Vec::new(); - output.push(op); + subset = Vec::new(); + output.push(op); + } } + i += 1; } - i += 1; + + output } - output + fn optimise_subset(&self, run: Vec) -> Vec { + #[derive(Clone)] + enum Change { + Add(Wrapping), + Set(Wrapping), + } + let mut tape: HashMap = HashMap::new(); + let start = TapeCell2D(0, 0); + let mut head = TapeCell2D(0, 0); + let mut i = 0; + // simulate the subprogram to find the exact changes made to the tape + while i < run.len() { + let op = run[i]; + match op { + Opcode2D::Clear => { + tape.insert(head, Change::Set(Wrapping(0i8))); + } + Opcode2D::Subtract | Opcode2D::Add => { + let mut change = tape.remove(&head).unwrap_or(Change::Add(Wrapping(0i8))); + + let (Change::Add(val) | Change::Set(val)) = &mut change; + *val += match op { + Opcode2D::Add => 1, + Opcode2D::Subtract => -1, + _ => 0, + }; + + match &change { + Change::Add(val) => { + if *val != Wrapping(0i8) { + tape.insert(head, change); + } + } + Change::Set(_) => { + tape.insert(head, change); + } + } + } + Opcode2D::Right => { + head.0 += 1; + } + Opcode2D::Left => { + head.0 -= 1; + } + Opcode2D::Up => { + head.1 += 1; + } + Opcode2D::Down => { + head.1 -= 1; + } + _ => (), + } + i += 1; + } + let mut output = Vec::new(); + if self.config.optimise_generated_all_permutations { + //Exhaustive approach checks all permutations + let mut output_length = i32::MAX; + let mut best_permutation = Vec::new(); + for perm in tape.iter().permutations(tape.len()) { + let mut position = start; + let mut current_output_length = 0; + //Calculate the distance of this + for (cell, _) in &perm { + current_output_length += (cell.0 - position.0).abs(); + current_output_length += (cell.1 - position.1).abs(); + position = **cell; + if current_output_length > output_length { + break; + } + } + if current_output_length > output_length { + continue; + } + //Add the distance to the finishing location + current_output_length += (head.0 - position.0).abs(); + current_output_length += (head.1 - position.1).abs(); + if current_output_length < output_length { + best_permutation = perm; + output_length = current_output_length; + } + } + let mut position = start; + for (cell, change) in best_permutation { + output = _move_position(output, &position, cell); + position = *cell; + if let Change::Set(_) = change { + output.push(Opcode2D::Clear); + } + let (Change::Add(v) | Change::Set(v)) = change; + let v = v.0; + for _ in 0..(v as i32).abs() { + output.push(match v == -128 || v > 0 { + true => Opcode2D::Add, + false => Opcode2D::Subtract, + }); + } + } + output = _move_position(output, &position, &head); + } else { + //Greedy approach faster for bigger datasets + let mut position = start; + //For the number of cells navigate to the nearest cell + for _ in 0..tape.len() { + if !tape.is_empty() { + let mut min_distance = i32::MAX; + let mut next_position = TapeCell2D(0, 0); + for (cell, _value) in tape.iter() { + if (cell.0 - position.0).abs() + (cell.1 - position.1).abs() < min_distance + { + min_distance = + (cell.0 - position.0).abs() + (cell.1 - position.1).abs(); + next_position = *cell; + } + } + // Move to next position + output = _move_position(output, &position, &next_position); + position = next_position; + //Now Update the output with correct opcodes + let change = tape.remove(&next_position).unwrap(); + if let Change::Set(_) = change { + output.push(Opcode2D::Clear); + } + let (Change::Add(v) | Change::Set(v)) = change; + let v = v.0; + for _ in 0..(v as i32).abs() { + output.push(match v == -128 || v > 0 { + true => Opcode2D::Add, + false => Opcode2D::Subtract, + }); + } + } + } + output = _move_position(output, &position, &head); + } + output + } } -fn move_position( +fn _move_position( mut program: Vec, old_position: &TapeCell2D, new_position: &TapeCell2D, @@ -68,159 +209,54 @@ fn move_position( program } -fn optimise_subset(run: Vec, exhaustive: bool) -> Vec { - #[derive(Clone)] - enum Change { - Add(Wrapping), - Set(Wrapping), - } - let mut tape: HashMap = HashMap::new(); - let start = TapeCell2D(0, 0); - let mut head = TapeCell2D(0, 0); - let mut i = 0; - //Generate a map of cells we change and how we plan to change them - while i < run.len() { - let op = run[i]; - match op { - Opcode2D::Clear => { - tape.insert(head, Change::Set(Wrapping(0i8))); - } - Opcode2D::Subtract | Opcode2D::Add => { - let mut change = tape.remove(&head).unwrap_or(Change::Add(Wrapping(0i8))); - - let (Change::Add(val) | Change::Set(val)) = &mut change; - *val += match op { - Opcode2D::Add => 1, - Opcode2D::Subtract => -1, - _ => 0, - }; - - match &change { - Change::Add(val) => { - if *val != Wrapping(0i8) { - tape.insert(head, change); - } - } - Change::Set(_) => { - tape.insert(head, change); - } - } - } - Opcode2D::Right => { - head.0 += 1; - } - Opcode2D::Left => { - head.0 -= 1; - } - Opcode2D::Up => { - head.1 += 1; - } - Opcode2D::Down => { - head.1 -= 1; - } - _ => (), - } - i += 1; - } - let mut output = Vec::new(); - if exhaustive { - //Exhaustive approach checks all permutations - let mut output_length = i32::MAX; - let mut best_permutation = Vec::new(); - for perm in tape.iter().permutations(tape.len()) { - let mut position = start; - let mut current_output_length = 0; - //Calculate the distance of this - for (cell, _) in &perm { - current_output_length += (cell.0 - position.0).abs(); - current_output_length += (cell.1 - position.1).abs(); - position = **cell; - if current_output_length > output_length { - break; - } - } - if current_output_length > output_length { - continue; - } - //Add the distance to the finishing location - current_output_length += (head.0 - position.0).abs(); - current_output_length += (head.1 - position.1).abs(); - if current_output_length < output_length { - best_permutation = perm; - output_length = current_output_length; - } - } - let mut position = start; - for (cell, change) in best_permutation { - output = move_position(output, &position, cell); - position = *cell; - if let Change::Set(_) = change { - output.push(Opcode2D::Clear); - } - let (Change::Add(v) | Change::Set(v)) = change; - let v = v.0; - for _ in 0..(v as i32).abs() { - output.push(match v == -128 || v > 0 { - true => Opcode2D::Add, - false => Opcode2D::Subtract, - }); - } - } - output = move_position(output, &position, &head); - } else { - //Greedy approach faster for bigger datasets - let mut position = start; - //For the number of cells navigate to the nearest cell - for _ in 0..tape.len() { - if !tape.is_empty() { - let mut min_distance = i32::MAX; - let mut next_position = TapeCell2D(0, 0); - for (cell, _value) in tape.iter() { - if (cell.0 - position.0).abs() + (cell.1 - position.1).abs() < min_distance { - min_distance = (cell.0 - position.0).abs() + (cell.1 - position.1).abs(); - next_position = *cell; - } - } - // Move to next position - output = move_position(output, &position, &next_position); - position = next_position; - //Now Update the output with correct opcodes - let change = tape.remove(&next_position).unwrap(); - if let Change::Set(_) = change { - output.push(Opcode2D::Clear); - } - let (Change::Add(v) | Change::Set(v)) = change; - let v = v.0; - for _ in 0..(v as i32).abs() { - output.push(match v == -128 || v > 0 { - true => Opcode2D::Add, - false => Opcode2D::Subtract, - }); - } - } - } - output = move_position(output, &position, &head); - } - output -} - #[cfg(test)] mod bf_optimiser_tests { - use crate::backend::BrainfuckOpcodes; - - use super::*; + use crate::{ + backend::BrainfuckOpcodes, + misc::{MastermindConfig, MastermindContext}, + }; + + const CTX_OPT: MastermindContext = MastermindContext { + config: MastermindConfig { + optimise_generated_code: true, + optimise_generated_all_permutations: false, + optimise_cell_clearing: false, + optimise_unreachable_loops: false, + optimise_variable_usage: false, + optimise_memory_allocation: false, + optimise_constants: false, + optimise_empty_blocks: false, + memory_allocation_method: 0, + enable_2d_grid: false, + }, + }; + + const CTX_OPT_EXHAUSTIVE: MastermindContext = MastermindContext { + config: MastermindConfig { + optimise_generated_code: true, + optimise_generated_all_permutations: true, + optimise_cell_clearing: false, + optimise_unreachable_loops: false, + optimise_variable_usage: false, + optimise_memory_allocation: false, + optimise_constants: false, + optimise_empty_blocks: false, + memory_allocation_method: 0, + enable_2d_grid: false, + }, + }; #[test] fn greedy_subset_equivalence_test_0() { let v = BrainfuckOpcodes::from_str("+++>><<++>--->+++<><><><><<<<<+++"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, false).to_string(); + let o = CTX_OPT.optimise_subset(v).to_string(); assert_eq!(o, "+++++>--->+++<<<<<+++"); } #[test] fn greedy_program_equivalence_test_0() { let v = BrainfuckOpcodes::from_str("<><><>++<+[--++>>+<<-]"); - let o: String = optimise(v, false).to_string(); + let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, "++<+[->>+<<]"); } @@ -229,28 +265,28 @@ mod bf_optimiser_tests { let v = BrainfuckOpcodes::from_str( "+++++++++>>+++>---->>>++++--<--++<>++<+<->]++--->+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 - let o: String = optimise(v, false).to_string(); + let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, "+++++++++>>+++++++>---->>>++<<<<[>++<]"); } #[test] fn greedy_program_equivalence_test_2() { let v = BrainfuckOpcodes::from_str(">><."); - let o: String = optimise(v, false).to_string(); + let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, ">."); } #[test] fn greedy_subset_equivalence_test_1() { let v = BrainfuckOpcodes::from_str("+++<+++>[-]+++"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, false).to_string(); + let o = CTX_OPT.optimise_subset(v).to_string(); assert_eq!(o, "[-]+++<+++>"); } #[test] fn greedy_subset_equivalence_test_2() { let v = BrainfuckOpcodes::from_str("+++<+++>[-]+++[-]<[-]--+>-"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, false).to_string(); + let o = CTX_OPT.optimise_subset(v).to_string(); assert_eq!(o, "[-]-<[-]->"); } @@ -259,21 +295,21 @@ mod bf_optimiser_tests { let v = BrainfuckOpcodes::from_str( "+++++[-]+++++++++>>+++>---->>>++++--<--++<>++<+<->]++--->+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 - let o: String = optimise(v, false).to_string(); + let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, "[-]+++++++++>>+++++++>---->>>++<<<<[[-]+>++<]"); } #[test] fn greedy_two_dimensional_subset_equivalence_test_0() { let v = BrainfuckOpcodes::from_str("+++^^vv++^---^+++v^v^v^v^vvvvv+++"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, false).to_string(); + let o = CTX_OPT.optimise_subset(v).to_string(); assert_eq!(o, "+++++^---^+++vvvvv+++"); } #[test] fn greedy_two_dimensional_program_equivalence_test_0() { let v = BrainfuckOpcodes::from_str("v^v^v^++v+[--++^^+vv-]"); - let o: String = optimise(v, false).to_string(); + let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, "++v+[-^^+vv]"); } @@ -282,28 +318,28 @@ mod bf_optimiser_tests { let v = BrainfuckOpcodes::from_str( "+++++++++^^+++^----^^^++++--v--++vvhellov++++[-v+^^++v+v-^]++---^+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 - let o: String = optimise(v, false).to_string(); + let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, "+++++++++^^+++++++^----^^^++vvvv[^++v]"); } #[test] fn greedy_two_dimensional_program_equivalence_test_2() { let v = BrainfuckOpcodes::from_str("^^v."); - let o: String = optimise(v, false).to_string(); + let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, "^."); } #[test] fn greedy_two_dimensional_subset_equivalence_test_1() { let v = BrainfuckOpcodes::from_str("+++v+++^[-]+++"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, false).to_string(); + let o = CTX_OPT.optimise_subset(v).to_string(); assert_eq!(o, "[-]+++v+++^"); } #[test] fn greedy_two_dimensional_subset_equivalence_test_2() { let v = BrainfuckOpcodes::from_str("+++v+++^[-]+++[-]v[-]--+^-"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, false).to_string(); + let o = CTX_OPT.optimise_subset(v).to_string(); assert_eq!(o, "[-]-v[-]-^"); } @@ -312,7 +348,7 @@ mod bf_optimiser_tests { let v = BrainfuckOpcodes::from_str( "+++++[-]+++++++++^^+++^----^^^++++--v--++vvhellov++++[[-]v+^^++v+v-^]++---^+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 - let o: String = optimise(v, false).to_string(); + let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, "[-]+++++++++^^+++++++^----^^^++vvvv[[-]+^++v]"); } @@ -320,7 +356,7 @@ mod bf_optimiser_tests { #[ignore] fn exhaustive_subset_equivalence_test_0() { let v = BrainfuckOpcodes::from_str("+++>><<++>--->+++<><><><><<<<<+++"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, true).to_string(); + let o = CTX_OPT_EXHAUSTIVE.optimise_subset(v).to_string(); assert_eq!(o, ">--->+++<<+++++<<<+++"); } @@ -328,7 +364,7 @@ mod bf_optimiser_tests { #[ignore] fn exhaustive_program_equivalence_test_0() { let v = BrainfuckOpcodes::from_str("<><><>++<+[--++>>+<<-]"); - let o: String = optimise(v, true).to_string(); + let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, "++<+[>>+<<-]"); } @@ -338,7 +374,7 @@ mod bf_optimiser_tests { let v = BrainfuckOpcodes::from_str( "+++++++++>>+++>---->>>++++--<--++<>++<+<->]++--->+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 - let o: String = optimise(v, true).to_string(); + let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, "+++++++++>>+++++++>>>>++<<<----<[>++<]"); } @@ -346,7 +382,7 @@ mod bf_optimiser_tests { #[ignore] fn exhaustive_program_equivalence_test_2() { let v = BrainfuckOpcodes::from_str(">><."); - let o: String = optimise(v, true).to_string(); + let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, ">."); } @@ -354,7 +390,7 @@ mod bf_optimiser_tests { #[ignore] fn exhaustive_subset_equivalence_test_1() { let v = BrainfuckOpcodes::from_str("+++<+++>[-]+++"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, true).to_string(); + let o = CTX_OPT_EXHAUSTIVE.optimise_subset(v).to_string(); assert_eq!(o, "[-]+++<+++>"); } @@ -362,7 +398,7 @@ mod bf_optimiser_tests { #[ignore] fn exhaustive_subset_equivalence_test_2() { let v = BrainfuckOpcodes::from_str("+++<+++>[-]+++[-]<[-]--+>-"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, true).to_string(); + let o = CTX_OPT_EXHAUSTIVE.optimise_subset(v).to_string(); assert_eq!(o, "[-]-<[-]->"); } @@ -372,7 +408,7 @@ mod bf_optimiser_tests { let v = BrainfuckOpcodes::from_str( "+++++[-]+++++++++>>+++>---->>>++++--<--++<>++<+<->]++--->+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 - let o: String = optimise(v, true).to_string(); + let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, "[-]+++++++++>>+++++++>---->>>++<<<<[[-]+>++<]"); } @@ -380,7 +416,7 @@ mod bf_optimiser_tests { #[ignore] fn exhaustive_two_dimensional_subset_equivalence_test_0() { let v = BrainfuckOpcodes::from_str("+++^^vv++^---^+++v^v^v^v^vvvvv+++"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, true).to_string(); + let o = CTX_OPT_EXHAUSTIVE.optimise_subset(v).to_string(); assert_eq!(o, "^^+++v---v+++++vvv+++"); } @@ -388,7 +424,7 @@ mod bf_optimiser_tests { #[ignore] fn exhaustive_two_dimensional_program_equivalence_test_0() { let v = BrainfuckOpcodes::from_str("v^v^v^++v+[--++^^+vv-]"); - let o: String = optimise(v, true).to_string(); + let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, "++v+[^^+vv-]"); } @@ -398,7 +434,7 @@ mod bf_optimiser_tests { let v = BrainfuckOpcodes::from_str( "+++++++++^^+++^----^^^++++--v--++vvhellov++++[-v+^^++v+v-^]++---^+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 - let o: String = optimise(v, true).to_string(); + let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, "+++++++++^^+++++++^----^^^++vvvv[^++v]"); } @@ -406,7 +442,7 @@ mod bf_optimiser_tests { #[ignore] fn exhaustive_two_dimensional_program_equivalence_test_2() { let v = BrainfuckOpcodes::from_str("^^v."); - let o: String = optimise(v, true).to_string(); + let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, "^."); } @@ -414,7 +450,7 @@ mod bf_optimiser_tests { #[ignore] fn exhaustive_two_dimensional_subset_equivalence_test_1() { let v = BrainfuckOpcodes::from_str("+++v+++^[-]+++"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, true).to_string(); + let o = CTX_OPT_EXHAUSTIVE.optimise_subset(v).to_string(); assert_eq!(o, "[-]+++v+++^"); } @@ -422,7 +458,7 @@ mod bf_optimiser_tests { #[ignore] fn exhaustive_two_dimensional_subset_equivalence_test_2() { let v = BrainfuckOpcodes::from_str("+++v+++^[-]+++[-]v[-]--+^-"); //(3) 0 0 [5] -3 3 - let o = optimise_subset(v, true).to_string(); + let o = CTX_OPT_EXHAUSTIVE.optimise_subset(v).to_string(); assert_eq!(o, "[-]-v[-]-^"); } @@ -432,7 +468,7 @@ mod bf_optimiser_tests { let v = BrainfuckOpcodes::from_str( "+++++[-]+++++++++^^+++^----^^^++++--v--++vvhellov++++[[-]v+^^++v+v-^]++---^+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 - let o: String = optimise(v, true).to_string(); + let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, "[-]+++++++++^^^^^^++vvv----v+++++++[^++v[-]+]"); } @@ -440,7 +476,7 @@ mod bf_optimiser_tests { let v = BrainfuckOpcodes::from_str( "-++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++", ); - let o: String = optimise_subset(v, false).to_string(); + let o: String = CTX_OPT.optimise_subset(v).to_string(); println!("{o}"); assert_eq!(o.len(), 127); } @@ -450,7 +486,7 @@ mod bf_optimiser_tests { let v = BrainfuckOpcodes::from_str( "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++", ); - let o: String = optimise_subset(v, false).to_string(); + let o: String = CTX_OPT.optimise_subset(v).to_string(); println!("{o}"); assert_eq!(o.len(), 128); } @@ -460,7 +496,7 @@ mod bf_optimiser_tests { let v = BrainfuckOpcodes::from_str( "+--------------------------------------------------------------------------------------------------------------------------------" ); - let o: String = optimise_subset(v, false).to_string(); + let o: String = CTX_OPT.optimise_subset(v).to_string(); println!("{o}"); assert_eq!(o.len(), 127); } @@ -470,7 +506,7 @@ mod bf_optimiser_tests { let v = BrainfuckOpcodes::from_str( "--------------------------------------------------------------------------------------------------------------------------------" ); - let o: String = optimise_subset(v, false).to_string(); + let o: String = CTX_OPT.optimise_subset(v).to_string(); println!("{o}"); assert_eq!(o.len(), 128); } @@ -480,7 +516,7 @@ mod bf_optimiser_tests { let v = BrainfuckOpcodes::from_str( "- --------------------------------------------------------------------------------------------------------------------------------" ); - let o: String = optimise_subset(v, false).to_string(); + let o: String = CTX_OPT.optimise_subset(v).to_string(); println!("{o}"); assert_eq!(o.len(), 127); } @@ -490,7 +526,7 @@ mod bf_optimiser_tests { let v = BrainfuckOpcodes::from_str( "[-]--------------------------------------------------------------------------------------------------------------------------------" ); - let o: String = optimise_subset(v, false).to_string(); + let o: String = CTX_OPT.optimise_subset(v).to_string(); println!("{o}"); assert_eq!(o.len(), 131); } diff --git a/compiler/src/frontend.rs b/compiler/src/frontend.rs index c6b94a7..98f15df 100644 --- a/compiler/src/frontend.rs +++ b/compiler/src/frontend.rs @@ -14,9 +14,9 @@ use crate::{ // memory stuff is all WIP and some comments may be incorrect -impl MastermindContext<'_> { +impl MastermindContext { pub fn create_ir_scope<'a>( - &'a self, + &self, clauses: &[Clause], outer_scope: Option<&'a ScopeBuilder>, ) -> Result, String> { @@ -492,16 +492,13 @@ impl MastermindContext<'_> { // create a scope object for functions from the outside scope let functions_scope = scope.open_inner_templates_only(); // compile the block and extend the operations - - let ctx = MastermindContext { - config: &self.config, - }; - let instructions = ctx + let instructions = self .create_ir_scope(&mm_clauses, Some(&functions_scope))? + // compile without cleaning up top level variables, this is the brainfuck programmer's responsibility .build_ir(false); - // compile without cleaning up top level variables, this is the brainfuck programmer's responsibility // it is also the brainfuck programmer's responsibility to return to the start position - let bf_code = ctx.ir_to_bf(instructions, Some(TapeCell2D(0, 0)))?; + let bf_code = + self.ir_to_bf(instructions, Some(TapeCell2D(0, 0)))?; expanded_bf.extend(bf_code); } ExtendedOpcode::Add => expanded_bf.push(Opcode2D::Add), diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index a6920c6..6f4b3f0 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -17,8 +17,6 @@ mod tests; use backend::BrainfuckOpcodes; use brainfuck::{BVMConfig, BVM}; -use brainfuck_optimiser::optimise; -use misc::MastermindConfig; use parser::parse; use preprocessor::preprocess_from_memory; use tokeniser::tokenise; @@ -46,8 +44,9 @@ pub fn wasm_compile( let file_contents: HashMap = serde_wasm_bindgen::from_value(file_contents).unwrap(); - let config: MastermindConfig = serde_wasm_bindgen::from_value(config).unwrap(); - let ctx = MastermindContext { config: &config }; + let ctx = MastermindContext { + config: serde_wasm_bindgen::from_value(config).unwrap(), + }; let preprocessed_file = preprocess_from_memory(&file_contents, entry_file_name)?; let tokens = tokenise(&preprocessed_file)?; @@ -55,8 +54,8 @@ pub fn wasm_compile( let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); let bf_code = ctx.ir_to_bf(instructions, None)?; - Ok(match config.optimise_generated_code { - true => optimise(bf_code, config.optimise_generated_all_permutations).to_string(), + Ok(match ctx.config.optimise_generated_code { + true => ctx.optimise_bf_code(bf_code).to_string(), false => bf_code.to_string(), }) } diff --git a/compiler/src/main.rs b/compiler/src/main.rs index fcb339d..49094d6 100644 --- a/compiler/src/main.rs +++ b/compiler/src/main.rs @@ -16,7 +16,6 @@ mod tests; use backend::BrainfuckOpcodes; use brainfuck::{BVMConfig, BVM}; -use brainfuck_optimiser::optimise; use misc::MastermindConfig; use parser::parse; use preprocessor::preprocess; @@ -74,9 +73,10 @@ fn main() -> Result<(), String> { let args = Arguments::parse(); - // TODO: change this to not be a bitmask, or at least document it - let config = MastermindConfig::new(args.optimise); - let ctx = MastermindContext { config: &config }; + let ctx = MastermindContext { + // TODO: change this to not be a bitmask, or at least document it + config: MastermindConfig::new(args.optimise), + }; let program; match args.file { @@ -106,8 +106,8 @@ fn main() -> Result<(), String> { let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); let bf_code = ctx.ir_to_bf(instructions, None)?; - match config.optimise_generated_code { - true => optimise(bf_code, config.optimise_generated_all_permutations).to_string(), + match ctx.config.optimise_generated_code { + true => ctx.optimise_bf_code(bf_code).to_string(), false => bf_code.to_string(), } } diff --git a/compiler/src/misc.rs b/compiler/src/misc.rs index f592be5..dfb2b21 100644 --- a/compiler/src/misc.rs +++ b/compiler/src/misc.rs @@ -2,6 +2,7 @@ pub struct MastermindConfig { // basic pure brainfuck optimisations pub optimise_generated_code: bool, + // TODO: rename this: (turn on exhaustive search for solving 2D brainfuck optimisation) pub optimise_generated_all_permutations: bool, // track cell value and clear with constant addition if possible pub optimise_cell_clearing: bool, @@ -61,6 +62,6 @@ impl MastermindConfig { } } -pub struct MastermindContext<'a> { - pub config: &'a MastermindConfig, +pub struct MastermindContext { + pub config: MastermindConfig, } diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index ed18efe..984978a 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -6,10 +6,9 @@ pub mod black_box_tests { use crate::{ backend::{BrainfuckOpcodes, Opcode2D}, brainfuck::{bvm_tests::run_code, BVMConfig}, - misc::MastermindContext, + misc::{MastermindConfig, MastermindContext}, parser::parse, tokeniser::{tokenise, Token}, - MastermindConfig, }; // TODO: run test suite with different optimisations turned on const OPT_NONE: MastermindConfig = MastermindConfig { @@ -90,7 +89,7 @@ pub mod black_box_tests { const TESTING_BVM_MAX_STEPS: usize = 100_000_000; fn compile_and_run(program: String, input: String) -> Result { - let ctx = MastermindContext { config: &OPT_NONE }; + let ctx = MastermindContext { config: OPT_NONE }; let tokens: Vec = tokenise(&program)?; let clauses = parse(&tokens)?; let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); @@ -108,10 +107,10 @@ pub mod black_box_tests { fn compile_program( program: String, - config: Option<&MastermindConfig>, + config: Option, ) -> Result, String> { let ctx = MastermindContext { - config: config.unwrap_or(&OPT_NONE), + config: config.unwrap_or(OPT_NONE), }; let tokens: Vec = tokenise(&program)?; let clauses = parse(&tokens)?; @@ -567,7 +566,7 @@ output x + 'f'; ); let input = String::from(""); let desired_output = String::from("f"); - let code = compile_program(program, Some(&OPT_ALL))?; + let code = compile_program(program, Some(OPT_ALL))?; assert_eq!( desired_output, run_code(BVM_CONFIG_1D, code.to_string(), input, None) @@ -587,7 +586,7 @@ output x + 'f'; ); let input = String::from(""); let desired_output = String::from("f"); - let code = compile_program(program, Some(&OPT_ALL))?; + let code = compile_program(program, Some(OPT_ALL))?; assert_eq!( desired_output, run_code(BVM_CONFIG_1D, code.to_string(), input, None) @@ -867,7 +866,7 @@ output 10; ); let input = String::from(""); let desired_output = String::from("01231\n"); - let code = compile_program(program, Some(&OPT_NONE))?.to_string(); + let code = compile_program(program, Some(OPT_NONE))?.to_string(); println!("{}", code); let output = run_code(BVM_CONFIG_1D, code, input, None); println!("{output}"); @@ -2606,7 +2605,7 @@ a = 0; output a; "#, ); - let code = compile_program(program, Some(&OPT_ALL))?.to_string(); + let code = compile_program(program, Some(OPT_ALL))?.to_string(); println!("{code}"); assert!(code.starts_with("+++++.--.")); @@ -2624,7 +2623,7 @@ a = 0; output a; "#, ); - let code = compile_program(program, Some(&OPT_ALL))?.to_string(); + let code = compile_program(program, Some(OPT_ALL))?.to_string(); println!("{code}"); assert!(code.starts_with("++.[-].")); @@ -2868,7 +2867,7 @@ output 'h'; let input = String::from(""); let desired_output = String::from("h"); - let code = compile_program(program, Some(&OPT_ALL))?; + let code = compile_program(program, Some(OPT_ALL))?; println!("{}", code.clone().to_string()); assert_eq!( desired_output, @@ -2894,7 +2893,7 @@ output a + 3; let input = String::from(""); let desired_output = String::from("tIJ"); - let code = compile_program(program, Some(&OPT_ALL))?.to_string(); + let code = compile_program(program, Some(OPT_ALL))?.to_string(); println!("{}", code); assert_eq!(desired_output, run_code(BVM_CONFIG_1D, code, input, None)); @@ -2921,7 +2920,7 @@ output a + 3; memory_allocation_method: 128, enable_2d_grid: false, }; - let _code = compile_program(program, Some(&cfg)); + let _code = compile_program(program, Some(cfg)); } #[test] fn tiles_memory_allocation_1() -> Result<(), String> { @@ -2940,7 +2939,7 @@ cell j = 1; ); let desired_output = String::from("+vv+^^+>vv+^+^+"); - let code = compile_program(program, Some(&OPT_NONE_TILES))?.to_string(); + let code = compile_program(program, Some(OPT_NONE_TILES))?.to_string(); assert_eq!(desired_output, code); Ok(()) @@ -2972,7 +2971,7 @@ output i; let input = String::from(""); let desired_output = String::from("123456789"); - let code = compile_program(program, Some(&OPT_NONE_TILES))?.to_string(); + let code = compile_program(program, Some(OPT_NONE_TILES))?.to_string(); println!("{}", code); assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); @@ -2987,7 +2986,7 @@ cell a @2,4 = 1; cell[4] b @0,4; "#, ); - let code = compile_program(program, Some(&OPT_NONE_TILES)); + let code = compile_program(program, Some(OPT_NONE_TILES)); assert!(code.is_err()); assert!(code .unwrap_err() @@ -3013,7 +3012,7 @@ output b[3]; output a; "#, ); - let code = compile_program(program, Some(&OPT_NONE_TILES))?.to_string(); + let code = compile_program(program, Some(OPT_NONE_TILES))?.to_string(); println!("{}", code); let input = String::from(""); let desired_output = String::from("12345"); @@ -3038,7 +3037,7 @@ cell j = 1; ); let desired_output = String::from("+>+<^+>>v+<^+<^+>>>vv+<^+<^+"); - let code = compile_program(program, Some(&OPT_NONE_ZIG_ZAG))?.to_string(); + let code = compile_program(program, Some(OPT_NONE_ZIG_ZAG))?.to_string(); assert_eq!(desired_output, code); Ok(()) @@ -3070,7 +3069,7 @@ output i; let input = String::from(""); let desired_output = String::from("123456789"); - let code = compile_program(program, Some(&OPT_NONE_ZIG_ZAG))?.to_string(); + let code = compile_program(program, Some(OPT_NONE_ZIG_ZAG))?.to_string(); println!("{}", code); assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); @@ -3085,7 +3084,7 @@ cell a @2,4 = 1; cell[4] b @0,4; "#, ); - let code = compile_program(program, Some(&OPT_NONE_ZIG_ZAG)); + let code = compile_program(program, Some(OPT_NONE_ZIG_ZAG)); assert!(code.is_err()); assert!(code .unwrap_err() @@ -3111,7 +3110,7 @@ output b[3]; output a; "#, ); - let code = compile_program(program, Some(&OPT_NONE_ZIG_ZAG))?.to_string(); + let code = compile_program(program, Some(OPT_NONE_ZIG_ZAG))?.to_string(); println!("{}", code); let input = String::from(""); let desired_output = String::from("12345"); @@ -3136,7 +3135,7 @@ cell j = 1; ); let desired_output = String::from("^+>+v+<+<+^+^+>+>+"); - let code = compile_program(program, Some(&OPT_NONE_SPIRAL))?.to_string(); + let code = compile_program(program, Some(OPT_NONE_SPIRAL))?.to_string(); assert_eq!(desired_output, code); Ok(()) @@ -3168,7 +3167,7 @@ output i; let input = String::from(""); let desired_output = String::from("123456789"); - let code = compile_program(program, Some(&OPT_NONE_SPIRAL))?.to_string(); + let code = compile_program(program, Some(OPT_NONE_SPIRAL))?.to_string(); println!("{}", code); assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); @@ -3183,7 +3182,7 @@ cell a @2,4 = 1; cell[4] b @0,4; "#, ); - let code = compile_program(program, Some(&OPT_NONE_SPIRAL)); + let code = compile_program(program, Some(OPT_NONE_SPIRAL)); assert!(code.is_err()); assert!(code .unwrap_err() @@ -3209,7 +3208,7 @@ output b[3]; output a; "#, ); - let code = compile_program(program, Some(&OPT_NONE_SPIRAL))?.to_string(); + let code = compile_program(program, Some(OPT_NONE_SPIRAL))?.to_string(); println!("{}", code); let input = String::from(""); let desired_output = String::from("12345"); From f15cb7453ec03f7d37aaca1745babdecd4a56871 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Tue, 14 Oct 2025 11:33:19 +1100 Subject: [PATCH 09/56] Add tests for parser + --- compiler/src/backend.rs | 2 +- compiler/src/brainfuck.rs | 115 +++++++------------------------------- compiler/src/frontend.rs | 18 +++--- compiler/src/main.rs | 2 +- compiler/src/parser.rs | 99 +++++++++++++++++++++++--------- 5 files changed, 104 insertions(+), 132 deletions(-) diff --git a/compiler/src/backend.rs b/compiler/src/backend.rs index d88a8fc..b83b599 100644 --- a/compiler/src/backend.rs +++ b/compiler/src/backend.rs @@ -21,7 +21,7 @@ use crate::{ type LoopDepth = usize; type TapeValue = u8; -#[derive(PartialEq, Clone, Hash, Eq, Copy)] +#[derive(PartialEq, Clone, Hash, Eq, Copy, Debug)] pub struct TapeCell2D(pub i32, pub i32); impl Display for TapeCell2D { diff --git a/compiler/src/brainfuck.rs b/compiler/src/brainfuck.rs index 6f41d39..f62cd20 100644 --- a/compiler/src/brainfuck.rs +++ b/compiler/src/brainfuck.rs @@ -8,29 +8,29 @@ use std::{ num::Wrapping, }; -use crate::macros::macros::r_panic; +use crate::{backend::TapeCell2D, macros::macros::r_panic}; use wasm_bindgen::{JsCast, JsValue}; use wasm_bindgen_futures::JsFuture; -struct Tape { - memory_map: HashMap<(i32, i32), Wrapping>, - head_position: (i32, i32), +struct Tape { + memory_map: HashMap>, + head_position: TapeCell, } -impl Tape { +impl Tape { fn new() -> Self { Tape { memory_map: HashMap::new(), - head_position: (0, 0), + head_position: TapeCell2D(0, 0), } } - fn get_cell(&self, position: (i32, i32)) -> Wrapping { + fn get_cell(&self, position: TapeCell2D) -> Wrapping { match self.memory_map.get(&position) { Some(val) => *val, None => Wrapping(0), } } - fn move_head_position(&mut self, amount: (i32, i32)) { + fn move_head_position(&mut self, amount: TapeCell2D) { self.head_position.0 += amount.0; self.head_position.1 += amount.1; } @@ -57,69 +57,6 @@ impl Tape { } } -impl fmt::Display for Tape { - // absolutely horrible code here, not even used ever so should just get rid of it - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut line_0 = String::with_capacity(50); - let mut line_1 = String::with_capacity(50); - let mut line_2 = String::with_capacity(50); - let mut line_3 = String::with_capacity(50); - let mut line_4 = String::with_capacity(50); - - // disgusting - line_0.push('|'); - line_1.push('|'); - line_2.push('|'); - line_3.push('|'); - line_4.push('|'); - - for pos in (self.head_position.1 - 10)..(self.head_position.1 + 10) { - let val = self.get_cell((pos, 0)).0; - let mut dis = 32u8; - if val.is_ascii_alphanumeric() || val.is_ascii_punctuation() { - dis = val; - } - - // dodgy af, I don't know rust or the best way but I know this isn't - line_0.push_str(format!("{val:03}").as_str()); - - line_1.push_str(format!("{:3}", (val as i8)).as_str()); - - line_2.push_str(format!(" {val:02x}").as_str()); - - line_3.push(' '); - line_3.push(' '); - line_3.push(dis as char); - - line_4 += match pos == self.head_position.1 { - true => "^^^", - false => "---", - }; - - line_0.push('|'); - line_1.push('|'); - line_2.push('|'); - line_3.push('|'); - line_4.push('|'); - } - - // disgusting but I just want this to work - let _ = f.write_str("\n"); - let _ = f.write_str(&line_0); - let _ = f.write_str("\n"); - let _ = f.write_str(&line_1); - let _ = f.write_str("\n"); - let _ = f.write_str(&line_2); - let _ = f.write_str("\n"); - let _ = f.write_str(&line_3); - let _ = f.write_str("\n"); - let _ = f.write_str(&line_4); - let _ = f.write_str("\n"); - - Ok(()) - } -} - pub struct BVMConfig { pub enable_debug_symbols: bool, pub enable_2d_grid: bool, @@ -127,7 +64,7 @@ pub struct BVMConfig { pub struct BVM { config: BVMConfig, - tape: Tape, + tape: Tape, program: Vec, } @@ -153,9 +90,8 @@ impl BVM { // TODO: refactor/rewrite this, can definitely be improved with async read/write traits or similar // I don't love that I duplicated this to make it work with js // TODO: this isn't covered by unit tests - // TODO: add a maximum step count pub async fn run_async( - &mut self, + mut self, output_callback: &js_sys::Function, input_callback: &js_sys::Function, ) -> Result { @@ -174,19 +110,6 @@ impl BVM { ('+', _, _) => self.tape.increment_current_cell(Wrapping(1)), ('-', _, _) => self.tape.increment_current_cell(Wrapping(-1i8 as u8)), (',', _, _) => { - // https://github.com/rustwasm/wasm-bindgen/issues/2195 - // let password_jsval: JsValue = func.call1(&this, &JsValue::from_bool(true))?; - // let password_promise_res: Result = - // password_jsval.dyn_into(); - // let password_promise = password_promise_res - // .map_err(|_| "Function askUnlockPassword does not return a Promise") - // .map_err(err_to_js)?; - // let password_jsstring = JsFuture::from(password_promise).await?; - // let password = password_jsstring - // .as_string() - // .ok_or("Promise didn't return a String") - // .map_err(err_to_js)?; - // TODO: handle errors let jsval = input_callback .call0(&JsValue::null()) @@ -215,10 +138,10 @@ impl BVM { output_bytes.push(byte); } ('>', _, _) => { - self.tape.move_head_position((1, 0)); + self.tape.move_head_position(TapeCell2D(1, 0)); } ('<', _, _) => { - self.tape.move_head_position((-1, 0)); + self.tape.move_head_position(TapeCell2D(-1, 0)); } ('[', _, _) => { // entering a loop @@ -249,8 +172,8 @@ impl BVM { pc = loop_stack[loop_stack.len() - 1]; } } - ('^', _, true) => self.tape.move_head_position((0, 1)), - ('v', _, true) => self.tape.move_head_position((0, -1)), + ('^', _, true) => self.tape.move_head_position(TapeCell2D(0, 1)), + ('v', _, true) => self.tape.move_head_position(TapeCell2D(0, -1)), ('^', _, false) => { r_panic!("2D Brainfuck currently disabled"); } @@ -279,7 +202,7 @@ impl BVM { } pub fn run( - &mut self, + mut self, input: &mut impl Read, output: &mut impl Write, max_steps: Option, @@ -307,10 +230,10 @@ impl BVM { let _ = output.write(&buf); } ('>', _, _) => { - self.tape.move_head_position((1, 0)); + self.tape.move_head_position(TapeCell2D(1, 0)); } ('<', _, _) => { - self.tape.move_head_position((-1, 0)); + self.tape.move_head_position(TapeCell2D(-1, 0)); } ('[', _, _) => { // entering a loop @@ -341,8 +264,8 @@ impl BVM { pc = loop_stack[loop_stack.len() - 1]; } } - ('^', _, true) => self.tape.move_head_position((0, 1)), - ('v', _, true) => self.tape.move_head_position((0, -1)), + ('^', _, true) => self.tape.move_head_position(TapeCell2D(0, 1)), + ('v', _, true) => self.tape.move_head_position(TapeCell2D(0, -1)), ('^', _, false) => { r_panic!("2D Brainfuck currently disabled"); } diff --git a/compiler/src/frontend.rs b/compiler/src/frontend.rs index 98f15df..e9d2901 100644 --- a/compiler/src/frontend.rs +++ b/compiler/src/frontend.rs @@ -1171,14 +1171,19 @@ impl ScopeBuilder<'_, TapeCell2D> { LocationSpecifier::None => None, LocationSpecifier::Cell(l) => { // assert the y coordinate is 0 - r_assert!(l.1 == 0, "Struct field location specifiers do not support 2D grid cells: {var_def}"); + r_assert!( + l.1 == 0, + "Struct field location specifiers do not support 2D grid cells: {var_def}" + ); r_assert!( l.0 >= 0, "Struct field location specifiers must be non-negative: {var_def}" ); Some(l.0 as usize) } - LocationSpecifier::Variable(_) => r_panic!("Location specifiers in struct definitions must be relative, not variables: {var_def}"), + LocationSpecifier::Variable(_) => { + r_panic!( "Location specifiers in struct definitions must be relative, not variables: {var_def}") + } }; absolute_fields.push((var_def.name, absolute_type, non_neg_location_specifier)); } @@ -1233,14 +1238,14 @@ impl ScopeBuilder<'_, TapeCell2D> { /// Recursively find the definition of a struct type by searching up the scope call stack fn get_struct_definition(&self, struct_name: &str) -> Result<&DictStructType, String> { - Ok(if let Some(struct_def) = self.structs.get(struct_name) { - struct_def + if let Some(struct_def) = self.structs.get(struct_name) { + Ok(struct_def) } else if let Some(outer_scope) = self.outer_scope { // recurse - outer_scope.get_struct_definition(struct_name)? + outer_scope.get_struct_definition(struct_name) } else { r_panic!("No definition found for struct \"{struct_name}\"."); - }) + } } /// Construct an absolute type from a type reference @@ -1495,7 +1500,6 @@ impl ScopeBuilder<'_, TapeCell2D> { /// Return the absolute type and memory allocation for a variable name fn get_base_variable_memory(&self, var_name: &str) -> Result<(&ValueType, &Memory), String> { - // TODO: add function argument translations and embedded bf/mmi scope function restrictions match (self.outer_scope, self.variable_memory.get(var_name)) { (_, Some((value_type, memory))) => Ok((value_type, memory)), (Some(outer_scope), None) => outer_scope.get_base_variable_memory(var_name), diff --git a/compiler/src/main.rs b/compiler/src/main.rs index 49094d6..3ac1af6 100644 --- a/compiler/src/main.rs +++ b/compiler/src/main.rs @@ -101,7 +101,7 @@ fn main() -> Result<(), String> { // compile syntax tree into brainfuck // 2 stage compilation step, first stage compiles syntax tree into low-level instructions - // second stage actually writes out the low-level instructions into brainfuck + // second stage translates the low-level instructions into brainfuck let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); let bf_code = ctx.ir_to_bf(instructions, None)?; diff --git a/compiler/src/parser.rs b/compiler/src/parser.rs index fb16205..2d226db 100644 --- a/compiler/src/parser.rs +++ b/compiler/src/parser.rs @@ -5,67 +5,67 @@ use crate::{ }; use std::{fmt::Display, mem::discriminant, num::Wrapping}; -// recursive function to create a tree representation of the program +/// recursive function to create a tree representation of the program pub fn parse(tokens: &[Token]) -> Result>, String> { // basic steps: // chew off tokens from the front, recursively parse blocks of tokens let mut clauses = Vec::new(); let mut i = 0usize; - while let Some(clause) = get_clause_tokens(&tokens[i..])? { + while let Some(clause_tokens) = get_clause_tokens(&tokens[i..])? { match ( - &clause[0], - &clause.get(1).unwrap_or(&Token::None), - &clause.get(2).unwrap_or(&Token::None), + &clause_tokens[0], + &clause_tokens.get(1).unwrap_or(&Token::None), + &clause_tokens.get(2).unwrap_or(&Token::None), ) { (Token::Cell, _, _) | (Token::Struct, Token::Name(_), Token::Name(_) | Token::OpenSquareBracket) => { - clauses.push(parse_let_clause(clause)?); + clauses.push(parse_let_clause(clause_tokens)?); } (Token::Struct, Token::Name(_), Token::OpenBrace) => { - clauses.push(parse_struct_clause(clause)?); + clauses.push(parse_struct_clause(clause_tokens)?); } (Token::Plus, Token::Plus, _) | (Token::Minus, Token::Minus, _) => { - clauses.push(parse_increment_clause(clause)?); + clauses.push(parse_increment_clause(clause_tokens)?); } (Token::Name(_), Token::EqualsSign | Token::Dot | Token::OpenSquareBracket, _) => { - clauses.extend(parse_set_clause(clause)?); + clauses.extend(parse_set_clause(clause_tokens)?); } (Token::Drain, _, _) => { - clauses.push(parse_drain_copy_clause(clause, true)?); + clauses.push(parse_drain_copy_clause(clause_tokens, true)?); } (Token::Copy, _, _) => { - clauses.push(parse_drain_copy_clause(clause, false)?); + clauses.push(parse_drain_copy_clause(clause_tokens, false)?); } (Token::While, _, _) => { - clauses.push(parse_while_clause(clause)?); + clauses.push(parse_while_clause(clause_tokens)?); } (Token::Output, _, _) => { - clauses.push(parse_output_clause(clause)?); + clauses.push(parse_output_clause(clause_tokens)?); } (Token::Input, _, _) => { - clauses.push(parse_input_clause(clause)?); + clauses.push(parse_input_clause(clause_tokens)?); } (Token::Name(_), Token::OpenParenthesis, _) => { - clauses.push(parse_function_call_clause(clause)?); + clauses.push(parse_function_call_clause(clause_tokens)?); } (Token::Fn, _, _) => { - clauses.push(parse_function_definition_clause(clause)?); + clauses.push(parse_function_definition_clause(clause_tokens)?); } (Token::Name(_), Token::Plus | Token::Minus, Token::EqualsSign) => { - clauses.extend(parse_add_clause(clause)?); + clauses.extend(parse_add_clause(clause_tokens)?); } (Token::If, _, _) => { - clauses.push(parse_if_else_clause(clause)?); + clauses.push(parse_if_else_clause(clause_tokens)?); } (Token::OpenBrace, _, _) => { - let braced_tokens = get_braced_tokens(clause, BRACES)?; + let braced_tokens = get_braced_tokens(clause_tokens, BRACES)?; let inner_clauses = parse(braced_tokens)?; clauses.push(Clause::Block(inner_clauses)); } (Token::Bf, _, _) => { - clauses.push(parse_brainfuck_clause(clause)?); + clauses.push(parse_brainfuck_clause(clause_tokens)?); } - (Token::Assert, _, _) => clauses.push(parse_assert_clause(clause)?), + (Token::Assert, _, _) => clauses.push(parse_assert_clause(clause_tokens)?), // empty clause (Token::Semicolon, _, _) => (), // the None token usually represents whitespace, it should be filtered out before reaching this function @@ -102,9 +102,9 @@ pub fn parse(tokens: &[Token]) -> Result>, String> { | Token::UpToken, _, _, - ) => r_panic!("Invalid clause: {clause:#?}"), + ) => r_panic!("Invalid clause: {clause_tokens:#?}"), }; - i += clause.len(); + i += clause_tokens.len(); } Ok(clauses) @@ -866,9 +866,9 @@ fn parse_array_length(tokens: &[Token]) -> Result<(usize, usize), String> { Ok((len, i)) } -// get a clause, typically a line, bounded by ; +/// get a clause's tokens, typically a line, bounded by ; fn get_clause_tokens(tokens: &[Token]) -> Result, String> { - if tokens.len() < 2 { + if tokens.len() == 0 { Ok(None) } else { let mut i = 0usize; @@ -1192,6 +1192,7 @@ impl Expression { // TODO: add multiplication // yes, but no variable * variable multiplication or division #[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] pub enum Expression { SumExpression { sign: Sign, @@ -1204,6 +1205,7 @@ pub enum Expression { } #[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] pub enum Sign { Positive, Negative, @@ -1218,6 +1220,7 @@ impl Sign { } #[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] pub enum Clause { DeclareVariable { var: VariableDefinition, @@ -1287,6 +1290,7 @@ pub enum Clause { // extended brainfuck opcodes to include mastermind code blocks #[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] pub enum ExtendedOpcode { Add, Subtract, @@ -1303,7 +1307,6 @@ pub enum ExtendedOpcode { #[derive(Debug, Clone, Hash, PartialEq, Eq)] /// the type of a variable according to the user, not validated yet as the parser does not keep track of types -// maybe it should keep track of types? pub enum VariableTypeReference { Cell, Struct(String), @@ -1316,7 +1319,7 @@ pub enum LocationSpecifier { Cell(TapeCell), Variable(VariableTarget), } -impl LocationSpecifier { +impl LocationSpecifier { fn is_none(&self) -> bool { matches!(self, LocationSpecifier::None) } @@ -1425,3 +1428,45 @@ impl Display for VariableTarget { Ok(()) } } + +#[cfg(test)] +mod parser_tests { + use super::*; + + #[test] + fn parse_if_1() { + assert!(parse(&[ + // if true {{}} + Token::If, + Token::True, + Token::OpenBrace, + Token::OpenBrace, + Token::ClosingBrace, + Token::ClosingBrace, + ]) + .unwrap() + .iter() + .eq(&[Clause::IfElse { + condition: Expression::NaturalNumber(1), + if_block: Some(vec![Clause::::Block(vec![])]), + else_block: None, + }])); + } + + #[test] + fn end_tokens_1() { + let _ = parse(&[Token::Clobbers]).expect_err(""); + } + + #[test] + fn end_tokens_2() { + let _ = parse(&[Token::Semicolon]).unwrap(); + let _ = parse(&[Token::Semicolon, Token::Semicolon]).unwrap(); + let _ = parse(&[Token::Semicolon, Token::Semicolon, Token::Semicolon]).unwrap(); + } + + #[test] + fn end_tokens_3() { + let _ = parse(&[Token::Cell, Token::Semicolon]).expect_err(""); + } +} From e468fb830287fbe34842b7cbb2c86f034b2a2059 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Wed, 15 Oct 2025 16:16:06 +1100 Subject: [PATCH 10/56] Make parser generic for tape cells --- compiler/src/backend.rs | 12 +- compiler/src/brainfuck.rs | 110 ++++++------ compiler/src/brainfuck_optimiser.rs | 5 +- compiler/src/cells.rs | 20 +++ compiler/src/constants_optimiser.rs | 5 +- compiler/src/frontend.rs | 3 +- compiler/src/lib.rs | 21 +-- compiler/src/main.rs | 30 +++- compiler/src/parser.rs | 262 ++++++++++++++++++++-------- compiler/src/tests.rs | 6 +- compiler/src/tokeniser.rs | 4 +- 11 files changed, 308 insertions(+), 170 deletions(-) create mode 100644 compiler/src/cells.rs diff --git a/compiler/src/backend.rs b/compiler/src/backend.rs index b83b599..f190039 100644 --- a/compiler/src/backend.rs +++ b/compiler/src/backend.rs @@ -7,11 +7,11 @@ use std::{ collections::{HashMap, HashSet}, - fmt::Display, num::Wrapping, }; use crate::{ + cells::TapeCell2D, constants_optimiser::calculate_optimal_addition, frontend::{CellLocation, Instruction, MemoryId}, macros::macros::{r_assert, r_panic}, @@ -21,16 +21,6 @@ use crate::{ type LoopDepth = usize; type TapeValue = u8; -#[derive(PartialEq, Clone, Hash, Eq, Copy, Debug)] -pub struct TapeCell2D(pub i32, pub i32); - -impl Display for TapeCell2D { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!("({},{})", self.0, self.1))?; - Ok(()) - } -} - impl MastermindContext { pub fn ir_to_bf( &self, diff --git a/compiler/src/brainfuck.rs b/compiler/src/brainfuck.rs index f62cd20..c806fa3 100644 --- a/compiler/src/brainfuck.rs +++ b/compiler/src/brainfuck.rs @@ -3,12 +3,11 @@ use std::{ collections::HashMap, - fmt, io::{Read, Write}, num::Wrapping, }; -use crate::{backend::TapeCell2D, macros::macros::r_panic}; +use crate::{cells::TapeCell2D, macros::macros::r_panic}; use wasm_bindgen::{JsCast, JsValue}; use wasm_bindgen_futures::JsFuture; @@ -57,15 +56,13 @@ impl Tape { } } -pub struct BVMConfig { +pub struct BrainfuckConfig { pub enable_debug_symbols: bool, pub enable_2d_grid: bool, } -pub struct BVM { - config: BVMConfig, - tape: Tape, - program: Vec, +pub struct BrainfuckContext { + pub config: BrainfuckConfig, } pub trait AsyncByteReader { @@ -76,39 +73,33 @@ pub trait ByteWriter { fn write_byte(&mut self, byte: u8); } -impl BVM { +impl BrainfuckContext { const MAX_STEPS_DEFAULT: usize = (2 << 30) - 2; - pub fn new(config: BVMConfig, program: Vec) -> Self { - BVM { - config, - tape: Tape::new(), - program, - } - } - // TODO: refactor/rewrite this, can definitely be improved with async read/write traits or similar // I don't love that I duplicated this to make it work with js // TODO: this isn't covered by unit tests pub async fn run_async( - mut self, + &self, + program: Vec, output_callback: &js_sys::Function, input_callback: &js_sys::Function, ) -> Result { + let mut tape = Tape::new(); let mut pc: usize = 0; // this could be more efficient with a pre-computed map let mut loop_stack: Vec = Vec::new(); let mut output_bytes: Vec = Vec::new(); - while pc < self.program.len() { + while pc < program.len() { match ( - self.program[pc], + program[pc], self.config.enable_debug_symbols, self.config.enable_2d_grid, ) { - ('+', _, _) => self.tape.increment_current_cell(Wrapping(1)), - ('-', _, _) => self.tape.increment_current_cell(Wrapping(-1i8 as u8)), + ('+', _, _) => tape.increment_current_cell(Wrapping(1)), + ('-', _, _) => tape.increment_current_cell(Wrapping(-1i8 as u8)), (',', _, _) => { // TODO: handle errors let jsval = input_callback @@ -125,11 +116,11 @@ impl BVM { .as_f64() .expect("Could not convert js number into f64 type"); let byte: u8 = num as u8; // I have no idea if this works (TODO: test) - self.tape.set_current_cell(Wrapping(byte)); + tape.set_current_cell(Wrapping(byte)); } ('.', _, _) => { // TODO: handle errors - let byte = self.tape.get_current_cell().0; + let byte = tape.get_current_cell().0; let fnum: f64 = byte as f64; // I have no idea if this works (TODO: test again) output_callback .call1(&JsValue::null(), &JsValue::from_f64(fnum)) @@ -138,20 +129,20 @@ impl BVM { output_bytes.push(byte); } ('>', _, _) => { - self.tape.move_head_position(TapeCell2D(1, 0)); + tape.move_head_position(TapeCell2D(1, 0)); } ('<', _, _) => { - self.tape.move_head_position(TapeCell2D(-1, 0)); + tape.move_head_position(TapeCell2D(-1, 0)); } ('[', _, _) => { // entering a loop - if self.tape.get_current_cell().0 == 0 { + if tape.get_current_cell().0 == 0 { // skip the loop, (advance to the corresponding closing loop brace) // TODO: make this more efficient by pre-computing a loops map let mut loop_count = 1; while loop_count > 0 { pc += 1; - loop_count += match self.program[pc] { + loop_count += match program[pc] { '[' => 1, ']' => -1, _ => 0, @@ -163,7 +154,7 @@ impl BVM { } } (']', _, _) => { - if self.tape.get_current_cell().0 == 0 { + if tape.get_current_cell().0 == 0 { // exit the loop loop_stack.pop(); } else { @@ -172,8 +163,8 @@ impl BVM { pc = loop_stack[loop_stack.len() - 1]; } } - ('^', _, true) => self.tape.move_head_position(TapeCell2D(0, 1)), - ('v', _, true) => self.tape.move_head_position(TapeCell2D(0, -1)), + ('^', _, true) => tape.move_head_position(TapeCell2D(0, 1)), + ('v', _, true) => tape.move_head_position(TapeCell2D(0, -1)), ('^', _, false) => { r_panic!("2D Brainfuck currently disabled"); } @@ -181,10 +172,10 @@ impl BVM { r_panic!("2D Brainfuck currently disabled"); } // ('#', true, ) => { - // println!("{}", self.tape); + // println!("{}", tape); // } // ('@', true, _) => { - // print!("{}", self.tape.get_current_cell().0 as i32); + // print!("{}", tape.get_current_cell().0 as i32); // } _ => (), }; @@ -194,7 +185,7 @@ impl BVM { // .iter() // .collect(); // println!("{s}"); - // println!("{}", self.tape); + // println!("{}", tape); pc += 1; } @@ -202,48 +193,50 @@ impl BVM { } pub fn run( - mut self, + &self, + program: Vec, input: &mut impl Read, output: &mut impl Write, max_steps: Option, ) -> Result<(), String> { + let mut tape = Tape::new(); let mut steps = 0usize; let mut pc: usize = 0; // this could be more efficient with a pre-computed map let mut loop_stack: Vec = Vec::new(); - while pc < self.program.len() { + while pc < program.len() { match ( - self.program[pc], + program[pc], self.config.enable_debug_symbols, self.config.enable_2d_grid, ) { - ('+', _, _) => self.tape.increment_current_cell(Wrapping(1)), - ('-', _, _) => self.tape.increment_current_cell(Wrapping(-1i8 as u8)), + ('+', _, _) => tape.increment_current_cell(Wrapping(1)), + ('-', _, _) => tape.increment_current_cell(Wrapping(-1i8 as u8)), (',', _, _) => { let mut buf = [0; 1]; let _ = input.read_exact(&mut buf); - self.tape.set_current_cell(Wrapping(buf[0])); + tape.set_current_cell(Wrapping(buf[0])); } ('.', _, _) => { - let buf = [self.tape.get_current_cell().0]; + let buf = [tape.get_current_cell().0]; let _ = output.write(&buf); } ('>', _, _) => { - self.tape.move_head_position(TapeCell2D(1, 0)); + tape.move_head_position(TapeCell2D(1, 0)); } ('<', _, _) => { - self.tape.move_head_position(TapeCell2D(-1, 0)); + tape.move_head_position(TapeCell2D(-1, 0)); } ('[', _, _) => { // entering a loop - if self.tape.get_current_cell().0 == 0 { + if tape.get_current_cell().0 == 0 { // skip the loop, (advance to the corresponding closing loop brace) // TODO: make this more efficient by pre-computing a loops map let mut loop_count = 1; while loop_count > 0 { pc += 1; - loop_count += match self.program[pc] { + loop_count += match program[pc] { '[' => 1, ']' => -1, _ => 0, @@ -255,7 +248,7 @@ impl BVM { } } (']', _, _) => { - if self.tape.get_current_cell().0 == 0 { + if tape.get_current_cell().0 == 0 { // exit the loop loop_stack.pop(); } else { @@ -264,8 +257,8 @@ impl BVM { pc = loop_stack[loop_stack.len() - 1]; } } - ('^', _, true) => self.tape.move_head_position(TapeCell2D(0, 1)), - ('v', _, true) => self.tape.move_head_position(TapeCell2D(0, -1)), + ('^', _, true) => tape.move_head_position(TapeCell2D(0, 1)), + ('v', _, true) => tape.move_head_position(TapeCell2D(0, -1)), ('^', _, false) => { r_panic!("2D Brainfuck currently disabled"); } @@ -273,10 +266,10 @@ impl BVM { r_panic!("2D Brainfuck currently disabled"); } // '#' => { - // println!("{}", self.tape); + // println!("{}", tape); // } // '@' => { - // print!("{}", self.tape.get_current_cell().0 as i32); + // print!("{}", tape.get_current_cell().0 as i32); // } _ => (), }; @@ -286,7 +279,7 @@ impl BVM { // .iter() // .collect(); // println!("{s}"); - // println!("{}", self.tape); + // println!("{}", tape); pc += 1; // cut the program short if it runs forever @@ -311,28 +304,33 @@ pub mod bvm_tests { use std::io::Cursor; pub fn run_code( - config: BVMConfig, + config: BrainfuckConfig, code: String, input: String, max_steps_cutoff: Option, ) -> String { - let mut bvm = BVM::new(config, code.chars().collect()); + let ctx = BrainfuckContext { config }; let input_bytes: Vec = input.bytes().collect(); let mut input_stream = Cursor::new(input_bytes); let mut output_stream = Cursor::new(Vec::new()); - bvm.run(&mut input_stream, &mut output_stream, max_steps_cutoff) - .unwrap(); + ctx.run( + code.chars().collect(), + &mut input_stream, + &mut output_stream, + max_steps_cutoff, + ) + .unwrap(); // TODO: fix this unsafe stuff unsafe { String::from_utf8_unchecked(output_stream.into_inner()) } } - const BVM_CONFIG_1D: BVMConfig = BVMConfig { + const BVM_CONFIG_1D: BrainfuckConfig = BrainfuckConfig { enable_debug_symbols: false, enable_2d_grid: false, }; - const BVM_CONFIG_2D: BVMConfig = BVMConfig { + const BVM_CONFIG_2D: BrainfuckConfig = BrainfuckConfig { enable_debug_symbols: false, enable_2d_grid: true, }; diff --git a/compiler/src/brainfuck_optimiser.rs b/compiler/src/brainfuck_optimiser.rs index af7f57f..a8171b8 100644 --- a/compiler/src/brainfuck_optimiser.rs +++ b/compiler/src/brainfuck_optimiser.rs @@ -1,10 +1,7 @@ use itertools::Itertools; use std::{collections::HashMap, num::Wrapping}; -use crate::{ - backend::{Opcode2D, TapeCell2D}, - misc::MastermindContext, -}; +use crate::{backend::Opcode2D, cells::TapeCell2D, misc::MastermindContext}; // originally trivial post-compilation brainfuck optimisations // extended to 2D which makes it more difficult diff --git a/compiler/src/cells.rs b/compiler/src/cells.rs new file mode 100644 index 0000000..e240841 --- /dev/null +++ b/compiler/src/cells.rs @@ -0,0 +1,20 @@ +use std::fmt::Display; + +#[derive(Debug, PartialEq)] +pub struct TapeCell(pub i32); +#[derive(PartialEq, Clone, Hash, Eq, Copy, Debug)] +pub struct TapeCell2D(pub i32, pub i32); + +impl Display for TapeCell { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("{}", self.0))?; + Ok(()) + } +} + +impl Display for TapeCell2D { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("({},{})", self.0, self.1))?; + Ok(()) + } +} diff --git a/compiler/src/constants_optimiser.rs b/compiler/src/constants_optimiser.rs index fc43caf..e695620 100644 --- a/compiler/src/constants_optimiser.rs +++ b/compiler/src/constants_optimiser.rs @@ -1,5 +1,8 @@ // TODO: make unit tests for this -use crate::backend::{BFBuilder, Opcode2D, TapeCell2D}; +use crate::{ + backend::{BFBuilder, Opcode2D}, + cells::TapeCell2D, +}; // basically, most ascii characters are large numbers, which are more efficient to calculate with multiplication than with a bunch of + or - // an optimising brainfuck runtime will prefer a long string of +++++ or ----- however the goal of mastermind is to be used for code golf, which is not about speed diff --git a/compiler/src/frontend.rs b/compiler/src/frontend.rs index e9d2901..c445102 100644 --- a/compiler/src/frontend.rs +++ b/compiler/src/frontend.rs @@ -3,7 +3,8 @@ use std::{collections::HashMap, iter::zip}; use crate::{ - backend::{Opcode2D, TapeCell2D}, + backend::Opcode2D, + cells::TapeCell2D, macros::macros::{r_assert, r_panic}, misc::MastermindContext, parser::{ diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index 6f4b3f0..3d4a577 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -6,6 +6,7 @@ mod macros; mod backend; mod brainfuck; mod brainfuck_optimiser; +mod cells; mod constants_optimiser; mod frontend; mod misc; @@ -16,17 +17,15 @@ mod tokeniser; mod tests; use backend::BrainfuckOpcodes; -use brainfuck::{BVMConfig, BVM}; +use brainfuck::{BrainfuckConfig, BrainfuckContext}; +use misc::MastermindContext; use parser::parse; use preprocessor::preprocess_from_memory; use tokeniser::tokenise; use std::collections::HashMap; - use wasm_bindgen::{prelude::wasm_bindgen, JsValue}; -use crate::misc::MastermindContext; - pub fn set_panic_hook() { // copied from rustwasm.github.io // https://github.com/rustwasm/console_error_panic_hook @@ -69,14 +68,16 @@ pub async fn wasm_run_bf( ) -> Result { set_panic_hook(); - let config = BVMConfig { - enable_debug_symbols: false, - enable_2d_grid: enable_2d_grid, + let ctx = BrainfuckContext { + config: BrainfuckConfig { + enable_debug_symbols: false, + enable_2d_grid: enable_2d_grid, + }, }; - let mut bf = BVM::new(config, code.chars().collect()); - // hack, TODO: refactor - let r = bf.run_async(output_callback, input_callback).await?; + let r = ctx + .run_async(code.chars().collect(), output_callback, input_callback) + .await?; Ok(r) } diff --git a/compiler/src/main.rs b/compiler/src/main.rs index 3ac1af6..0c276ba 100644 --- a/compiler/src/main.rs +++ b/compiler/src/main.rs @@ -5,6 +5,7 @@ mod macros; mod backend; mod brainfuck; mod brainfuck_optimiser; +mod cells; mod constants_optimiser; mod frontend; mod parser; @@ -15,7 +16,6 @@ mod misc; mod tests; use backend::BrainfuckOpcodes; -use brainfuck::{BVMConfig, BVM}; use misc::MastermindConfig; use parser::parse; use preprocessor::preprocess; @@ -25,7 +25,10 @@ use std::io::{stdin, stdout, Cursor}; use clap::Parser; -use crate::misc::MastermindContext; +use crate::{ + brainfuck::{BrainfuckConfig, BrainfuckContext}, + misc::MastermindContext, +}; #[derive(Parser, Default, Debug)] #[command(author = "Heathcorp", version = "0.1", about = "Mastermind: the Brainfuck interpreter and compilation tool", long_about = None)] @@ -116,16 +119,27 @@ fn main() -> Result<(), String> { if args.run || !args.compile { // run brainfuck - let config = BVMConfig { - enable_debug_symbols: false, - enable_2d_grid: false, + let ctx = BrainfuckContext { + config: BrainfuckConfig { + enable_debug_symbols: false, + enable_2d_grid: false, + }, }; - let mut bvm = BVM::new(config, bf_program.chars().collect()); if args.input.is_some() { - bvm.run(&mut Cursor::new(args.input.unwrap()), &mut stdout(), None)?; + ctx.run( + bf_program.chars().collect(), + &mut Cursor::new(args.input.unwrap()), + &mut stdout(), + None, + )?; } else { - bvm.run(&mut stdin(), &mut stdout(), None)?; + ctx.run( + bf_program.chars().collect(), + &mut stdin(), + &mut stdout(), + None, + )?; } } else { print!("{bf_program}"); diff --git a/compiler/src/parser.rs b/compiler/src/parser.rs index 2d226db..c5f942c 100644 --- a/compiler/src/parser.rs +++ b/compiler/src/parser.rs @@ -1,12 +1,12 @@ use crate::{ - backend::TapeCell2D, + cells::{TapeCell, TapeCell2D}, macros::macros::{r_assert, r_panic}, tokeniser::Token, }; use std::{fmt::Display, mem::discriminant, num::Wrapping}; /// recursive function to create a tree representation of the program -pub fn parse(tokens: &[Token]) -> Result>, String> { +pub fn parse(tokens: &[Token]) -> Result>, String> { // basic steps: // chew off tokens from the front, recursively parse blocks of tokens let mut clauses = Vec::new(); @@ -99,7 +99,7 @@ pub fn parse(tokens: &[Token]) -> Result>, String> { | Token::Dot | Token::At | Token::Struct - | Token::UpToken, + | Token::Caret, _, _, ) => r_panic!("Invalid clause: {clause_tokens:#?}"), @@ -110,7 +110,7 @@ pub fn parse(tokens: &[Token]) -> Result>, String> { Ok(clauses) } -fn parse_let_clause(clause: &[Token]) -> Result, String> { +fn parse_let_clause(clause: &[Token]) -> Result, String> { // cell x = 0; // struct DummyStruct y let mut i = 0usize; @@ -134,7 +134,7 @@ fn parse_let_clause(clause: &[Token]) -> Result, String> { } /// Parse tokens representing a struct definition into a clause -fn parse_struct_clause(clause: &[Token]) -> Result, String> { +fn parse_struct_clause(clause: &[Token]) -> Result, String> { let mut i = 0usize; let Token::Struct = &clause[i] else { r_panic!("Expected struct keyword in struct clause. This should never occur. {clause:#?}"); @@ -182,8 +182,8 @@ fn parse_struct_clause(clause: &[Token]) -> Result, String> { }) } -fn parse_add_clause(clause: &[Token]) -> Result>, String> { - let mut clauses: Vec> = Vec::new(); +fn parse_add_clause(clause: &[Token]) -> Result>, String> { + let mut clauses: Vec> = Vec::new(); let mut i = 0usize; let (var, len) = parse_var_target(&clause[i..])?; i += len; @@ -217,7 +217,7 @@ fn parse_add_clause(clause: &[Token]) -> Result>, String> } // currently just syntax sugar, should make it actually do post/pre increments -fn parse_increment_clause(clause: &[Token]) -> Result, String> { +fn parse_increment_clause(clause: &[Token]) -> Result, String> { let (var, _) = parse_var_target(&clause[2..])?; //An increment clause can never be self referencing since it just VAR++ Ok(match (&clause[0], &clause[1]) { @@ -238,9 +238,9 @@ fn parse_increment_clause(clause: &[Token]) -> Result, String // assumed that the final token is a semicolon } -fn parse_set_clause(clause: &[Token]) -> Result>, String> { +fn parse_set_clause(clause: &[Token]) -> Result>, String> { // TODO: what do we do about arrays and strings and structs? - let mut clauses: Vec> = Vec::new(); + let mut clauses: Vec> = Vec::new(); let mut i = 0usize; let (var, len) = parse_var_target(&clause[i..])?; i += len; @@ -287,17 +287,17 @@ fn parse_set_clause(clause: &[Token]) -> Result>, String> Ok(clauses) } -fn parse_drain_copy_clause( +fn parse_drain_copy_clause( clause: &[Token], is_draining: bool, -) -> Result, String> { +) -> Result, String> { // drain g {i += 1;}; // drain g into j; // copy foo into bar {g += 2; etc;}; // TODO: make a tuple-parsing function and use it here instead of a space seperated list of targets let mut targets = Vec::new(); - let mut block: Vec> = Vec::new(); + let mut block = Vec::new(); let mut i = 1usize; let condition_start_token = i; @@ -353,17 +353,11 @@ fn parse_drain_copy_clause( }) } -fn parse_while_clause(clause: &[Token]) -> Result, String> { +fn parse_while_clause(clause: &[Token]) -> Result, String> { // TODO: make this able to accept expressions let mut i = 1usize; let (var, len) = parse_var_target(&clause[i..])?; i += len; - // loop { - // if let Token::OpenBrace = &clause[i] { - // break; - // }; - // i += 1; - // } // let expr = parse_expression(&clause[1..i]); let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; @@ -375,7 +369,7 @@ fn parse_while_clause(clause: &[Token]) -> Result, String> { }) } -fn parse_if_else_clause(clause: &[Token]) -> Result, String> { +fn parse_if_else_clause(clause: &[Token]) -> Result, String> { // skip first token, assumed to start with if let mut i = 1usize; let mut not = false; @@ -400,13 +394,13 @@ fn parse_if_else_clause(clause: &[Token]) -> Result, String> let condition = Expression::parse(&clause[condition_start_token..i])?; - let block_one: Vec> = { + let block_one = { let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; i += 2 + block_tokens.len(); parse(block_tokens)? }; - let block_two: Option>> = if let Some(Token::Else) = &clause.get(i) { + let block_two = if let Some(Token::Else) = &clause.get(i) { i += 1; let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; // i += 2 + block_tokens.len(); @@ -429,7 +423,7 @@ fn parse_if_else_clause(clause: &[Token]) -> Result, String> }) } -fn parse_output_clause(clause: &[Token]) -> Result, String> { +fn parse_output_clause(clause: &[Token]) -> Result, String> { let mut i = 1usize; let expr_tokens = &clause[i..(clause.len() - 1)]; @@ -443,7 +437,7 @@ fn parse_output_clause(clause: &[Token]) -> Result, String> { Ok(Clause::OutputValue { value: expr }) } -fn parse_input_clause(clause: &[Token]) -> Result, String> { +fn parse_input_clause(clause: &[Token]) -> Result, String> { let mut i = 1usize; let (var, len) = parse_var_target(&clause[i..])?; @@ -456,7 +450,7 @@ fn parse_input_clause(clause: &[Token]) -> Result, String> { Ok(Clause::InputVariable { var }) } -fn parse_assert_clause(clause: &[Token]) -> Result, String> { +fn parse_assert_clause(clause: &[Token]) -> Result, String> { let mut i = 1usize; let (var, len) = parse_var_target(&clause[i..])?; @@ -484,19 +478,75 @@ fn parse_assert_clause(clause: &[Token]) -> Result, String> { } } -// parse any memory location specifiers -// let g @4,2 = 68; -// or -// let p @3 = 68; -fn parse_location_specifier( - tokens: &[Token], -) -> Result<(LocationSpecifier, usize), String> { - if tokens.len() == 0 { - return Ok((LocationSpecifier::None, 0)); - } - if let Token::At = &tokens[0] { +pub trait TapeCellLocation +where + Self: Sized, +{ + // parse any memory location specifiers + // let g @(4,2) = 68; + // or + // let p @3 = 68; + fn parse_location_specifier( + tokens: &[Token], + ) -> Result<(LocationSpecifier, usize), String>; +} + +impl TapeCellLocation for TapeCell { + fn parse_location_specifier( + tokens: &[Token], + ) -> Result<(LocationSpecifier, usize), String> { + if tokens.len() == 0 { + return Ok((LocationSpecifier::None, 0)); + } + let Token::At = &tokens[0] else { + return Ok((LocationSpecifier::None, 0)); + }; + let mut i = 1; + match &tokens[i] { + Token::Minus => { + let mut positive = true; + if let Token::Minus = &tokens[i] { + i += 1; + positive = false; + } + + let Token::Digits(digits) = &tokens[i] else { + r_panic!("Expected number after \"-\" in location specifier: {tokens:#?}"); + }; + i += 1; + + // TODO: error handling + let offset = digits.parse::().unwrap(); + Ok(( + LocationSpecifier::Cell(TapeCell(if positive { offset } else { -offset })), + i, + )) + } + Token::Name(_) => { + // variable location specifier + let (var, len) = parse_var_target(&tokens[i..])?; + i += len; + Ok((LocationSpecifier::Variable(var), i)) + } + _ => r_panic!("Invalid location specifier: {tokens:#?}"), + } + } +} + +impl TapeCellLocation for TapeCell2D { + fn parse_location_specifier( + tokens: &[Token], + ) -> Result<(LocationSpecifier, usize), String> { + if tokens.len() == 0 { + return Ok((LocationSpecifier::None, 0)); + } + let Token::At = &tokens[0] else { + return Ok((LocationSpecifier::None, 0)); + }; + + let mut i = 1; match &tokens[i] { Token::Digits(_) | Token::Minus => { let x_offset = { @@ -513,11 +563,12 @@ fn parse_location_specifier( i += 1; // TODO: error handling - let mut offset: i32 = raw.parse().unwrap(); - if !positive { - offset = -offset; + let offset = raw.parse::().unwrap(); + if positive { + offset + } else { + -offset } - offset }; let y_offset = { @@ -536,11 +587,12 @@ fn parse_location_specifier( i += 1; // TODO: error handling - let mut offset: i32 = raw.parse().unwrap(); - if !positive { - offset = -offset; + let offset = raw.parse::().unwrap(); + if positive { + offset + } else { + -offset } - offset } else { 0 } @@ -558,11 +610,9 @@ fn parse_location_specifier( _ => r_panic!("Expected constant or variable in location specifier: {tokens:#?}"), } } - - Ok((LocationSpecifier::None, 0)) } -fn parse_brainfuck_clause(clause: &[Token]) -> Result, String> { +fn parse_brainfuck_clause(clause: &[Token]) -> Result, String> { // bf {++--<><} // bf @3 {++--<><} // bf clobbers var1 var2 {++--<><} @@ -572,7 +622,7 @@ fn parse_brainfuck_clause(clause: &[Token]) -> Result, String let mut i = 1usize; // check for location specifier - let (mem_offset, len) = parse_location_specifier(&clause[i..])?; + let (mem_offset, len) = TC::parse_location_specifier(&clause[i..])?; i += len; if let Token::Clobbers = &clause[i] { @@ -604,7 +654,7 @@ fn parse_brainfuck_clause(clause: &[Token]) -> Result, String Token::Minus => ops.push(ExtendedOpcode::Subtract), Token::MoreThan => ops.push(ExtendedOpcode::Right), Token::LessThan => ops.push(ExtendedOpcode::Left), - Token::UpToken => ops.push(ExtendedOpcode::Up), + Token::Caret => ops.push(ExtendedOpcode::Up), Token::OpenSquareBracket => ops.push(ExtendedOpcode::OpenLoop), Token::ClosingSquareBracket => ops.push(ExtendedOpcode::CloseLoop), Token::Dot => ops.push(ExtendedOpcode::Output), @@ -638,7 +688,9 @@ fn parse_brainfuck_clause(clause: &[Token]) -> Result, String }) } -fn parse_function_definition_clause(clause: &[Token]) -> Result, String> { +fn parse_function_definition_clause( + clause: &[Token], +) -> Result, String> { let mut i = 1usize; // function name let Token::Name(name) = &clause[i] else { @@ -679,7 +731,7 @@ fn parse_function_definition_clause(clause: &[Token]) -> Result> = parse(block_tokens)?; + let parsed_block = parse(block_tokens)?; Ok(Clause::DefineFunction { name: name.clone(), @@ -688,7 +740,7 @@ fn parse_function_definition_clause(clause: &[Token]) -> Result Result, String> { +fn parse_function_call_clause(clause: &[Token]) -> Result, String> { let mut i = 0usize; // Okay I didn't know this rust syntax, could have used it all over the place let Token::Name(name) = &clause[i] else { @@ -786,10 +838,10 @@ fn parse_var_target(tokens: &[Token]) -> Result<(VariableTarget, usize), String> } /// convert tokens of a variable definition into data representation, e.g. `cell x`, `struct G g`, `cell[5] x_arr`, `struct H[100] hs` -fn parse_var_definition( +fn parse_var_definition( tokens: &[Token], allow_location: bool, -) -> Result<(VariableDefinition, usize), String> { +) -> Result<(VariableDefinition, usize), String> { let mut i = 0usize; let mut var_type = match &tokens[i] { Token::Cell => { @@ -825,7 +877,7 @@ fn parse_var_definition( }; i += 1; - let (location_specifier, len) = parse_location_specifier(&tokens[i..])?; + let (location_specifier, len) = TC::parse_location_specifier(&tokens[i..])?; r_assert!( location_specifier.is_none() || allow_location, @@ -1219,19 +1271,21 @@ impl Sign { } } +/// Clause type with TC (tape cell) as a type variable +/// TC can be changed to implement 2D brainfuck, or other modifications #[derive(Debug, Clone)] #[cfg_attr(test, derive(PartialEq))] -pub enum Clause { +pub enum Clause { DeclareVariable { - var: VariableDefinition, + var: VariableDefinition, }, DefineVariable { - var: VariableDefinition, + var: VariableDefinition, value: Expression, }, DefineStruct { name: String, - fields: Vec>, + fields: Vec>, }, AddToVariable { var: VariableTarget, @@ -1252,12 +1306,12 @@ pub enum Clause { CopyLoop { source: Expression, targets: Vec, - block: Vec>, + block: Vec>, is_draining: bool, }, WhileLoop { var: VariableTarget, - block: Vec>, + block: Vec>, }, OutputValue { value: Expression, @@ -1268,8 +1322,8 @@ pub enum Clause { DefineFunction { name: String, // TODO: fix the type here, as function definitions don't actually need location specifiers and therefore don't need a tape cell type - arguments: Vec>, - block: Vec>, + arguments: Vec>, + block: Vec>, }, CallFunction { function_name: String, @@ -1277,14 +1331,14 @@ pub enum Clause { }, IfElse { condition: Expression, - if_block: Option>>, - else_block: Option>>, + if_block: Option>>, + else_block: Option>>, }, - Block(Vec>), + Block(Vec>), InlineBrainfuck { - location_specifier: LocationSpecifier, + location_specifier: LocationSpecifier, clobbered_variables: Vec, - operations: Vec>, + operations: Vec>, }, } @@ -1431,6 +1485,8 @@ impl Display for VariableTarget { #[cfg(test)] mod parser_tests { + use crate::cells::TapeCell; + use super::*; #[test] @@ -1455,18 +1511,76 @@ mod parser_tests { #[test] fn end_tokens_1() { - let _ = parse(&[Token::Clobbers]).expect_err(""); + let _ = parse::(&[Token::Clobbers]).expect_err(""); } #[test] fn end_tokens_2() { - let _ = parse(&[Token::Semicolon]).unwrap(); - let _ = parse(&[Token::Semicolon, Token::Semicolon]).unwrap(); - let _ = parse(&[Token::Semicolon, Token::Semicolon, Token::Semicolon]).unwrap(); + let _ = parse::(&[Token::Semicolon]).unwrap(); + let _ = parse::(&[Token::Semicolon, Token::Semicolon]).unwrap(); + let _ = parse::(&[Token::Semicolon, Token::Semicolon, Token::Semicolon]).unwrap(); } #[test] fn end_tokens_3() { - let _ = parse(&[Token::Cell, Token::Semicolon]).expect_err(""); + let _ = parse::(&[Token::Cell, Token::Semicolon]).expect_err(""); + } + + #[test] + fn while_condition_1() { + assert!(parse::(&[ + Token::While, + Token::Name(String::from("x")), + Token::OpenBrace, + Token::OpenBrace, + Token::ClosingBrace, + Token::ClosingBrace, + ]) + .unwrap() + .iter() + .eq(&[Clause::WhileLoop { + var: VariableTarget { + name: String::from("x"), + subfields: None, + is_spread: false + }, + block: vec![Clause::Block(vec![])] + }])) + } + + #[test] + fn two_dimensional_1() { + let _ = parse::(&[ + Token::Cell, + Token::Name(String::from("x")), + Token::At, + Token::Digits(String::from("0")), + Token::Comma, + Token::Digits(String::from("1")), + Token::Semicolon, + ]) + .expect_err(""); + } + + #[test] + fn two_dimensional_2() { + assert!(parse::(&[ + Token::Cell, + Token::Name(String::from("x")), + Token::At, + Token::Digits(String::from("0")), + Token::Comma, + Token::Digits(String::from("1")), + Token::Semicolon, + ]) + .unwrap() + .iter() + .eq(&[Clause::DeclareVariable { + var: VariableDefinition { + name: String::from("x"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::Cell(TapeCell2D(0, 1)) + } + }])); } } diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index 984978a..524c32f 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -5,7 +5,7 @@ pub mod black_box_tests { use crate::{ backend::{BrainfuckOpcodes, Opcode2D}, - brainfuck::{bvm_tests::run_code, BVMConfig}, + brainfuck::{bvm_tests::run_code, BrainfuckConfig}, misc::{MastermindConfig, MastermindContext}, parser::parse, tokeniser::{tokenise, Token}, @@ -76,12 +76,12 @@ pub mod black_box_tests { enable_2d_grid: false, }; - const BVM_CONFIG_1D: BVMConfig = BVMConfig { + const BVM_CONFIG_1D: BrainfuckConfig = BrainfuckConfig { enable_debug_symbols: false, enable_2d_grid: false, }; - const BVM_CONFIG_2D: BVMConfig = BVMConfig { + const BVM_CONFIG_2D: BrainfuckConfig = BrainfuckConfig { enable_debug_symbols: false, enable_2d_grid: true, }; diff --git a/compiler/src/tokeniser.rs b/compiler/src/tokeniser.rs index 68e926a..1f4c819 100644 --- a/compiler/src/tokeniser.rs +++ b/compiler/src/tokeniser.rs @@ -41,7 +41,7 @@ pub fn tokenise(source: &String) -> Result, String> { (")", Token::ClosingParenthesis), ("<", Token::LessThan), (">", Token::MoreThan), - ("^", Token::UpToken), + ("^", Token::Caret), ("true", Token::True), ("false", Token::False), (",", Token::Comma), @@ -228,7 +228,7 @@ pub enum Token { Plus, EqualsSign, Semicolon, - UpToken, + Caret, } #[cfg(test)] From fee58206b6f6b8b8f891f14157d338c56ee2dfdc Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Thu, 16 Oct 2025 18:38:11 +1100 Subject: [PATCH 11/56] Make compiler frontend generic and add TapeCellVariant trait --- compiler/src/backend.rs | 41 +-- compiler/src/brainfuck.rs | 11 +- compiler/src/cells.rs | 50 ++- compiler/src/constants_optimiser.rs | 8 +- compiler/src/frontend.rs | 363 +++++++++++----------- compiler/src/lib.rs | 40 ++- compiler/src/main.rs | 51 ++- compiler/src/parser.rs | 99 ++++-- compiler/src/tests.rs | 464 +++++++++++++++------------- compiler/src/tokeniser.rs | 8 +- 10 files changed, 637 insertions(+), 498 deletions(-) diff --git a/compiler/src/backend.rs b/compiler/src/backend.rs index f190039..a21eda7 100644 --- a/compiler/src/backend.rs +++ b/compiler/src/backend.rs @@ -22,10 +22,10 @@ type LoopDepth = usize; type TapeValue = u8; impl MastermindContext { - pub fn ir_to_bf( + pub fn ir_to_bf + PartialEq>( &self, - instructions: Vec>, - return_to_cell: Option, + instructions: Vec>, + return_to_cell: Option, ) -> Result, String> { let mut allocator = CellAllocator::new(); let mut alloc_map: HashMap< @@ -36,7 +36,7 @@ impl MastermindContext { let mut loop_stack: Vec = Vec::new(); let mut current_loop_depth: LoopDepth = 0; let mut skipped_loop_depth: Option = None; - let mut ops = BFBuilder::new(); + let mut ops = BFBuilder2D::new(); for instruction in instructions { if let Some(depth) = skipped_loop_depth { @@ -60,7 +60,7 @@ impl MastermindContext { // however they will absolutely not be very efficient if used directly as cell locations Instruction::Allocate(memory, location_specifier) => { let cell = allocator.allocate( - location_specifier, + location_specifier.map(|c| c.into()), memory.len(), self.config.memory_allocation_method, )?; @@ -120,7 +120,7 @@ outside of loop it was allocated" ); }; - allocator.free(cell, size)?; + allocator.free(cell.into(), size)?; } Instruction::OpenLoop(cell_obj) => { let Some((cell_base, size, alloc_loop_depth, known_values)) = @@ -217,7 +217,7 @@ outside of loop it was allocated" // TODO: instead find the nearest zero cell, doesn't matter if allocated or not let temp_cell = allocator.allocate_temp_cell(cell); - let optimised_ops: BFBuilder = + let optimised_ops: BFBuilder2D = calculate_optimal_addition(imm as i8, ops.head_pos, cell, temp_cell); ops.head_pos = optimised_ops.head_pos; @@ -329,7 +329,7 @@ outside of loop it was allocated" Instruction::InsertBrainfuckAtCell(operations, location_specifier) => { // move to the correct cell, based on the location specifier match location_specifier { - CellLocation::FixedCell(cell) => ops.move_to_cell(cell), + CellLocation::FixedCell(cell) => ops.move_to_cell(cell.into()), CellLocation::MemoryCell(cell_obj) => { let Some((cell_base, size, _alloc_loop_depth, _known_values)) = alloc_map.get(&cell_obj.memory_id) @@ -355,21 +355,21 @@ outside of loop it was allocated" // this is used in embedded brainfuck contexts to preserve head position if let Some(origin_cell) = return_to_cell { - ops.move_to_cell(origin_cell); + ops.move_to_cell(origin_cell.into()); } Ok(ops.opcodes) } } -struct CellAllocator { - alloc_map: HashSet, +struct CellAllocator { + alloc_map: HashSet, } // allocator will not automatically allocate negative-index cells // but users can -impl CellAllocator { - fn new() -> CellAllocator { +impl CellAllocator { + fn new() -> CellAllocator { CellAllocator { alloc_map: HashSet::new(), } @@ -611,7 +611,7 @@ pub enum Opcode2D { Down, } -pub struct BFBuilder { +pub struct BFBuilder2D { opcodes: Vec, pub head_pos: TapeCell2D, } @@ -672,22 +672,22 @@ impl BrainfuckOpcodes for Vec { } } -impl BrainfuckOpcodes for BFBuilder { +impl BrainfuckOpcodes for BFBuilder2D { fn to_string(self) -> String { self.opcodes.to_string() } fn from_str(s: &str) -> Self { - BFBuilder { + BFBuilder2D { opcodes: Vec::from_str(s), head_pos: TapeCell2D(0, 0), } } } -impl BFBuilder { - pub fn new() -> BFBuilder { - BFBuilder { +impl BFBuilder2D { + pub fn new() -> BFBuilder2D { + BFBuilder2D { opcodes: Vec::new(), head_pos: TapeCell2D(0, 0), } @@ -747,3 +747,6 @@ impl BFBuilder { } } } + +#[cfg(test)] +mod backend_tests {} diff --git a/compiler/src/brainfuck.rs b/compiler/src/brainfuck.rs index c806fa3..977756e 100644 --- a/compiler/src/brainfuck.rs +++ b/compiler/src/brainfuck.rs @@ -7,13 +7,16 @@ use std::{ num::Wrapping, }; -use crate::{cells::TapeCell2D, macros::macros::r_panic}; +use crate::{ + cells::{TapeCell2D, TapeCellVariant}, + macros::macros::r_panic, +}; use wasm_bindgen::{JsCast, JsValue}; use wasm_bindgen_futures::JsFuture; -struct Tape { - memory_map: HashMap>, - head_position: TapeCell, +struct Tape { + memory_map: HashMap>, + head_position: TC, } impl Tape { diff --git a/compiler/src/cells.rs b/compiler/src/cells.rs index e240841..3aa5ef0 100644 --- a/compiler/src/cells.rs +++ b/compiler/src/cells.rs @@ -1,10 +1,24 @@ use std::fmt::Display; -#[derive(Debug, PartialEq)] +use crate::{parser::LocationSpecifier, tokeniser::Token}; + +/// when making Brainfuck variants, for a cell location type, you must implement this trait +/// for now this is implemented by TapeCell (1D location specifier), and TapeCell2D (2D) +pub trait TapeCellVariant +where + Self: PartialEq + Copy + Clone + Eq + TapeOrigin + TapeCellLocation, +{ +} + +#[derive(Debug, Hash, Eq, PartialEq, Clone, Copy)] pub struct TapeCell(pub i32); -#[derive(PartialEq, Clone, Hash, Eq, Copy, Debug)] + +#[derive(Debug, Hash, Eq, PartialEq, Clone, Copy)] pub struct TapeCell2D(pub i32, pub i32); +impl TapeCellVariant for TapeCell {} +impl TapeCellVariant for TapeCell2D {} + impl Display for TapeCell { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_fmt(format_args!("{}", self.0))?; @@ -18,3 +32,35 @@ impl Display for TapeCell2D { Ok(()) } } + +pub trait TapeOrigin { + fn origin_cell() -> Self; +} + +impl TapeOrigin for TapeCell { + fn origin_cell() -> TapeCell { + TapeCell(0) + } +} +impl TapeOrigin for TapeCell2D { + fn origin_cell() -> TapeCell2D { + TapeCell2D(0, 0) + } +} + +pub trait TapeCellLocation +where + Self: Sized + Display, +{ + /// parse any memory location specifiers + /// let g @(4,2) = 68; + /// or + /// let p @3 = 68; + fn parse_location_specifier( + tokens: &[Token], + ) -> Result<(LocationSpecifier, usize), String>; + + /// safely cast a 2D or 1D location specifier into a 1D non-negative cell offset, + /// for use with struct fields + fn to_positive_cell_offset(&self) -> Result; +} diff --git a/compiler/src/constants_optimiser.rs b/compiler/src/constants_optimiser.rs index e695620..520ead7 100644 --- a/compiler/src/constants_optimiser.rs +++ b/compiler/src/constants_optimiser.rs @@ -1,6 +1,6 @@ // TODO: make unit tests for this use crate::{ - backend::{BFBuilder, Opcode2D}, + backend::{BFBuilder2D, Opcode2D}, cells::TapeCell2D, }; @@ -16,14 +16,14 @@ pub fn calculate_optimal_addition( start_cell: TapeCell2D, target_cell: TapeCell2D, temp_cell: TapeCell2D, -) -> BFBuilder { +) -> BFBuilder2D { // can't abs() i8 directly because there is no +128i8, so abs(-128i8) crashes let abs_value = (value as i32).abs(); // STAGE 0: // for efficiency's sake, calculate the cost of just adding the constant to the cell let naive_solution = { - let mut ops = BFBuilder::new(); + let mut ops = BFBuilder2D::new(); ops.head_pos = start_cell; ops.move_to_cell(target_cell); ops.add_to_current_cell(value); @@ -74,7 +74,7 @@ pub fn calculate_optimal_addition( assert_eq!(best_combinations.len(), (abs_value as usize) + 1); let (a, b, c) = best_combinations.into_iter().last().unwrap(); - let mut ops = BFBuilder::new(); + let mut ops = BFBuilder2D::new(); ops.head_pos = start_cell; ops.move_to_cell(temp_cell); diff --git a/compiler/src/frontend.rs b/compiler/src/frontend.rs index c445102..ba008df 100644 --- a/compiler/src/frontend.rs +++ b/compiler/src/frontend.rs @@ -1,26 +1,31 @@ // compile syntax tree into low-level instructions -use std::{collections::HashMap, iter::zip}; +use std::{collections::HashMap, fmt::Display, iter::zip}; use crate::{ backend::Opcode2D, - cells::TapeCell2D, + cells::{TapeCell, TapeCell2D, TapeCellVariant}, macros::macros::{r_assert, r_panic}, misc::MastermindContext, parser::{ - Clause, Expression, ExtendedOpcode, LocationSpecifier, Reference, VariableDefinition, - VariableTarget, VariableTargetReferenceChain, VariableTypeReference, + Clause, Expression, ExtendedOpcode, LocationSpecifier, Reference, StructFieldDefinition, + VariableDefinition, VariableTarget, VariableTargetReferenceChain, VariableTypeReference, }, }; -// memory stuff is all WIP and some comments may be incorrect +// TODO: remove the need for this Into: +impl Into for TapeCell { + fn into(self) -> TapeCell2D { + TapeCell2D(self.0, 0) + } +} impl MastermindContext { - pub fn create_ir_scope<'a>( + pub fn create_ir_scope<'a, TC: 'static + TapeCellVariant + Into>( &self, - clauses: &[Clause], - outer_scope: Option<&'a ScopeBuilder>, - ) -> Result, String> { + clauses: &[Clause], + outer_scope: Option<&'a ScopeBuilder>, + ) -> Result, String> { let mut scope = if let Some(outer) = outer_scope { outer.open_inner() } else { @@ -29,18 +34,19 @@ impl MastermindContext { // TODO: fix unnecessary clones, and reimplement this with iterators somehow // hoist structs, then functions to top - let mut filtered_clauses_1: Vec> = vec![]; + let mut filtered_clauses_1: Vec> = vec![]; // first stage: structs (these need to be defined before functions, so they can be used as arguments) for clause in clauses { match clause { Clause::DefineStruct { name, fields } => { + // convert fields with 2D or 1D location specifiers to valid struct location specifiers scope.register_struct_definition(name, fields.clone())?; } _ => filtered_clauses_1.push(clause.clone()), } } // second stage: functions - let mut filtered_clauses_2: Vec> = vec![]; + let mut filtered_clauses_2: Vec> = vec![]; for clause in filtered_clauses_1 { match clause { Clause::DefineFunction { @@ -77,7 +83,7 @@ impl MastermindContext { | Expression::VariableReference(_), ) => { let cell = scope.get_cell(&VariableTarget::from_definition(&var))?; - _add_expr_to_cell(&mut scope, &value, cell)?; + scope._add_expr_to_cell(&value, cell)?; } // multi-cell arrays and (array literals or strings) @@ -90,7 +96,7 @@ impl MastermindContext { expressions.len() ); for (cell, expr) in zip(cells, expressions) { - _add_expr_to_cell(&mut scope, expr, cell)?; + scope._add_expr_to_cell(expr, cell)?; } } (ValueType::Array(_, _), Expression::StringLiteral(s)) => { @@ -152,11 +158,11 @@ impl MastermindContext { (false, false) => { let cell = scope.get_cell(&var)?; scope.push_instruction(Instruction::ClearCell(cell.clone())); - _add_expr_to_cell(&mut scope, &value, cell)?; + scope._add_expr_to_cell(&value, cell)?; } (false, true) => { let cell = scope.get_cell(&var)?; - _add_self_referencing_expr_to_cell(&mut scope, value, cell, true)?; + scope._add_self_referencing_expr_to_cell(value, cell, true)?; } (true, _) => { r_panic!("Unsupported operation, assigning to spread variable: {var}"); @@ -172,11 +178,11 @@ impl MastermindContext { } => match (var.is_spread, self_referencing) { (false, false) => { let cell = scope.get_cell(&var)?; - _add_expr_to_cell(&mut scope, &value, cell)?; + scope._add_expr_to_cell(&value, cell)?; } (false, true) => { let cell = scope.get_cell(&var)?; - _add_self_referencing_expr_to_cell(&mut scope, value, cell, false)?; + scope._add_self_referencing_expr_to_cell(value, cell, false)?; } (true, _) => { r_panic!("Unsupported operation, add-assigning to spread variable: {var}"); @@ -258,7 +264,7 @@ impl MastermindContext { index: None, }; - _add_expr_to_cell(&mut scope, &value, cell)?; + scope._add_expr_to_cell(&value, cell)?; scope.push_instruction(Instruction::OutputCell(cell)); scope.push_instruction(Instruction::ClearCell(cell)); @@ -277,7 +283,7 @@ impl MastermindContext { }; for value in expressions { - _add_expr_to_cell(&mut scope, &value, cell)?; + scope._add_expr_to_cell(&value, cell)?; scope.push_instruction(Instruction::OutputCell(cell)); scope.push_instruction(Instruction::ClearCell(cell)); } @@ -339,7 +345,7 @@ impl MastermindContext { memory_id: id, index: None, }; - _add_expr_to_cell(&mut scope, &source, new_cell)?; + scope._add_expr_to_cell(&source, new_cell)?; (new_cell, true) } (false, Expression::VariableReference(var)) => { @@ -356,7 +362,7 @@ impl MastermindContext { index: None, }; - _copy_cell(&mut scope, cell, new_cell, 1); + scope._copy_cell(cell, new_cell, 1); (new_cell, true) } @@ -435,7 +441,7 @@ impl MastermindContext { }; // copy the condition expression to the temporary condition cell - _add_expr_to_cell(&mut new_scope, &condition, condition_cell)?; + new_scope._add_expr_to_cell(&condition, condition_cell)?; new_scope.push_instruction(Instruction::OpenLoop(condition_cell)); // TODO: think about optimisations for clearing this variable, as the builder won't shorten it for safety as it doesn't know this loop is special @@ -497,9 +503,10 @@ impl MastermindContext { .create_ir_scope(&mm_clauses, Some(&functions_scope))? // compile without cleaning up top level variables, this is the brainfuck programmer's responsibility .build_ir(false); + // it is also the brainfuck programmer's responsibility to return to the start position let bf_code = - self.ir_to_bf(instructions, Some(TapeCell2D(0, 0)))?; + self.ir_to_bf(instructions, Some(TC::origin_cell()))?; expanded_bf.extend(bf_code); } ExtendedOpcode::Add => expanded_bf.push(Opcode2D::Add), @@ -605,132 +612,10 @@ impl MastermindContext { } } -// not sure if this should be in the scope impl? -// helper function for a common use-case -// flatten an expression and add it to a specific cell (using copies and adds, etc) -fn _add_expr_to_cell( - scope: &mut ScopeBuilder, - expr: &Expression, - cell: CellReference, -) -> Result<(), String> { - let (imm, adds, subs) = expr.flatten()?; - - scope.push_instruction(Instruction::AddToCell(cell.clone(), imm)); - - let mut adds_set = HashMap::new(); - for var in adds { - let n = adds_set.remove(&var).unwrap_or(0); - adds_set.insert(var, n + 1); - } - for var in subs { - let n = adds_set.remove(&var).unwrap_or(0); - adds_set.insert(var, n - 1); - } - - for (source, constant) in adds_set { - let source_cell = scope.get_cell(&source)?; - _copy_cell(scope, source_cell, cell.clone(), constant); - } - - Ok(()) -} - -//This function allows you to add a self referencing expression to the cell -//Separate this to ensure that normal expression don't require the overhead of copying -fn _add_self_referencing_expr_to_cell( - scope: &mut ScopeBuilder, - expr: Expression, - cell: CellReference, - pre_clear: bool, -) -> Result<(), String> { - //Create a new temp cell to store the current cell value - let temp_mem_id = scope.push_memory_id(); - scope.push_instruction(Instruction::Allocate( - Memory::Cell { id: temp_mem_id }, - None, - )); - let temp_cell = CellReference { - memory_id: temp_mem_id, - index: None, - }; - // TODO: make this more efficent by not requiring a clear cell after, - // i.e. simple move instead of copy by default for set operations (instead of +=) - _copy_cell(scope, cell, temp_cell, 1); - // Then if we are doing a += don't pre-clear otherwise Clear the current cell and run the same actions as _add_expr_to_cell - if pre_clear { - scope.push_instruction(Instruction::ClearCell(cell.clone())); - } - - let (imm, adds, subs) = expr.flatten()?; - - scope.push_instruction(Instruction::AddToCell(cell.clone(), imm)); - - let mut adds_set = HashMap::new(); - for var in adds { - let n = adds_set.remove(&var).unwrap_or(0); - adds_set.insert(var, n + 1); - } - for var in subs { - let n = adds_set.remove(&var).unwrap_or(0); - adds_set.insert(var, n - 1); - } - - for (source, constant) in adds_set { - let source_cell = scope.get_cell(&source)?; - //If we have an instance of the original cell being added simply use our temp cell value - // (crucial special sauce) - if source_cell.memory_id == cell.memory_id && source_cell.index == cell.index { - _copy_cell(scope, temp_cell, cell.clone(), constant); - } else { - _copy_cell(scope, source_cell, cell.clone(), constant); - } - } - //Cleanup - scope.push_instruction(Instruction::ClearCell(temp_cell)); - scope.push_instruction(Instruction::Free(temp_mem_id)); - - Ok(()) -} - -/// Helper function to copy a cell from one to another leaving the original unaffected -// TODO: make one for draining a cell -fn _copy_cell( - scope: &mut ScopeBuilder, - source_cell: CellReference, - target_cell: CellReference, - constant: i32, -) { - if constant == 0 { - return; - } - // allocate a temporary cell - let temp_mem_id = scope.push_memory_id(); - scope.push_instruction(Instruction::Allocate( - Memory::Cell { id: temp_mem_id }, - None, - )); - let temp_cell = CellReference { - memory_id: temp_mem_id, - index: None, - }; - // copy source to target and temp - scope.push_instruction(Instruction::OpenLoop(source_cell)); - scope.push_instruction(Instruction::AddToCell(target_cell, constant as u8)); - scope.push_instruction(Instruction::AddToCell(temp_cell, 1)); - scope.push_instruction(Instruction::AddToCell(source_cell, -1i8 as u8)); - scope.push_instruction(Instruction::CloseLoop(source_cell)); - // copy back from temp - scope.push_instruction(Instruction::OpenLoop(temp_cell)); - scope.push_instruction(Instruction::AddToCell(source_cell, 1)); - scope.push_instruction(Instruction::AddToCell(temp_cell, -1i8 as u8)); - scope.push_instruction(Instruction::CloseLoop(temp_cell)); - scope.push_instruction(Instruction::Free(temp_mem_id)); -} - // this is subject to change #[derive(Debug, Clone)] -pub enum Instruction { - Allocate(Memory, Option), +pub enum Instruction { + Allocate(Memory, Option), Free(MemoryId), // the number indicates which cell in the allocation stack should be freed (cell 0, is the top of the stack, 1 is the second element, etc) OpenLoop(CellReference), // same with other numbers here, they indicate the cell in the allocation stack to use in the instruction CloseLoop(CellReference), // pass in the cell id, this originally wasn't there but may be useful later on @@ -739,14 +624,14 @@ pub enum Instruction { ClearCell(CellReference), // not sure if this should be here, seems common enough that it should be AssertCellValue(CellReference, Option), // allows the user to hand-tune optimisations further OutputCell(CellReference), - InsertBrainfuckAtCell(Vec, CellLocation), + InsertBrainfuckAtCell(Vec, CellLocation), } #[derive(Debug, Clone)] /// Either a fixed constant cell or a reference to some existing memory -pub enum CellLocation { +pub enum CellLocation { Unspecified, - FixedCell(TapeCell), + FixedCell(TC), MemoryCell(CellReference), } @@ -832,9 +717,9 @@ pub struct ScopeBuilder<'a, TapeCell> { } #[derive(Clone, Debug)] // probably shouldn't be cloning here but whatever -struct Function { +struct Function { arguments: Vec<(String, ValueType)>, - block: Vec>, + block: Vec>, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -968,8 +853,11 @@ impl ValueType { } } -impl ScopeBuilder<'_, TapeCell2D> { - pub fn new() -> ScopeBuilder<'static, TapeCell2D> { +impl ScopeBuilder<'_, TC> +where + TC: Display + Clone, +{ + pub fn new() -> ScopeBuilder<'static, TC> { ScopeBuilder { outer_scope: None, types_only: false, @@ -983,7 +871,7 @@ impl ScopeBuilder<'_, TapeCell2D> { // I don't love this system of deciding what to clean up at the end in this specific function, but I'm not sure what the best way to achieve this would be // this used to be called "get_instructions" but I think this more implies things are being modified - pub fn build_ir(mut self, clean_up_variables: bool) -> Vec> { + pub fn build_ir(mut self, clean_up_variables: bool) -> Vec> { if !clean_up_variables { return self.instructions; } @@ -1026,12 +914,12 @@ impl ScopeBuilder<'_, TapeCell2D> { self.instructions } - fn push_instruction(&mut self, instruction: Instruction) { + fn push_instruction(&mut self, instruction: Instruction) { self.instructions.push(instruction); } /// Open a scope within the current one, any time there is a {} in Mastermind, this is called - fn open_inner(&self) -> ScopeBuilder { + fn open_inner(&self) -> ScopeBuilder { ScopeBuilder { outer_scope: Some(self), types_only: false, @@ -1045,7 +933,7 @@ impl ScopeBuilder<'_, TapeCell2D> { // syntactic context instead of normal context // used for embedded mm so that the inner mm can use outer functions - fn open_inner_templates_only(&self) -> ScopeBuilder { + fn open_inner_templates_only(&self) -> ScopeBuilder { ScopeBuilder { outer_scope: Some(self), types_only: true, @@ -1058,10 +946,7 @@ impl ScopeBuilder<'_, TapeCell2D> { } /// Get the correct variable type and allocate the right amount of cells for it - fn allocate_variable( - &mut self, - var: VariableDefinition, - ) -> Result<&ValueType, String> { + fn allocate_variable(&mut self, var: VariableDefinition) -> Result<&ValueType, String> { r_assert!( !self.variable_memory.contains_key(&var.name), "Cannot allocate variable {var} twice in the same scope" @@ -1130,7 +1015,7 @@ impl ScopeBuilder<'_, TapeCell2D> { &self, calling_name: &str, calling_arg_types: &Vec<&ValueType>, - ) -> Result, String> { + ) -> Result, String> { // this function is unaffected by the self.fn_only flag Ok( if let Some(func) = self.functions.iter().find(|(name, args, _)| { @@ -1162,31 +1047,17 @@ impl ScopeBuilder<'_, TapeCell2D> { fn register_struct_definition( &mut self, struct_name: &str, - fields: Vec>, + fields: Vec, ) -> Result<(), String> { let mut absolute_fields = vec![]; - for var_def in fields { - let absolute_type = self.create_absolute_type(&var_def.var_type)?; - let non_neg_location_specifier = match &var_def.location_specifier { - LocationSpecifier::None => None, - LocationSpecifier::Cell(l) => { - // assert the y coordinate is 0 - r_assert!( - l.1 == 0, - "Struct field location specifiers do not support 2D grid cells: {var_def}" - ); - r_assert!( - l.0 >= 0, - "Struct field location specifiers must be non-negative: {var_def}" - ); - Some(l.0 as usize) - } - LocationSpecifier::Variable(_) => { - r_panic!( "Location specifiers in struct definitions must be relative, not variables: {var_def}") - } - }; - absolute_fields.push((var_def.name, absolute_type, non_neg_location_specifier)); + for field_def in fields { + let absolute_type = self.create_absolute_type(&field_def.field_type)?; + absolute_fields.push(( + field_def.name, + absolute_type, + field_def.location_offset_specifier, + )); } let None = self @@ -1203,8 +1074,8 @@ impl ScopeBuilder<'_, TapeCell2D> { fn register_function_definition( &mut self, new_function_name: &str, - new_arguments: Vec>, - new_block: Vec>, + new_arguments: Vec>, + new_block: Vec>, ) -> Result<(), String> { let absolute_arguments = new_arguments .into_iter() @@ -1614,4 +1485,122 @@ mapping: {mapped_var_name} -> {target}" .insert(mapped_var_name, (var_type.clone(), mapped_memory)); Ok(()) } + + /// helper function for a common use-case: + /// flatten an expression and add it to a specific cell (using copies and adds, etc) + fn _add_expr_to_cell(&mut self, expr: &Expression, cell: CellReference) -> Result<(), String> { + let (imm, adds, subs) = expr.flatten()?; + + self.push_instruction(Instruction::AddToCell(cell.clone(), imm)); + + let mut adds_set = HashMap::new(); + for var in adds { + let n = adds_set.remove(&var).unwrap_or(0); + adds_set.insert(var, n + 1); + } + for var in subs { + let n = adds_set.remove(&var).unwrap_or(0); + adds_set.insert(var, n - 1); + } + + for (source, constant) in adds_set { + let source_cell = self.get_cell(&source)?; + self._copy_cell(source_cell, cell.clone(), constant); + } + + Ok(()) + } + + /// helper function to add a self-referencing expression to a cell + /// this is separated because it requires another copy ontop of normal expressions + // TODO: refactor/fix underlying logic for this + fn _add_self_referencing_expr_to_cell( + &mut self, + expr: Expression, + cell: CellReference, + pre_clear: bool, + ) -> Result<(), String> { + //Create a new temp cell to store the current cell value + let temp_mem_id = self.push_memory_id(); + self.push_instruction(Instruction::Allocate( + Memory::Cell { id: temp_mem_id }, + None, + )); + let temp_cell = CellReference { + memory_id: temp_mem_id, + index: None, + }; + // TODO: make this more efficent by not requiring a clear cell after, + // i.e. simple move instead of copy by default for set operations (instead of +=) + self._copy_cell(cell, temp_cell, 1); + // Then if we are doing a += don't pre-clear otherwise Clear the current cell and run the same actions as _add_expr_to_cell + if pre_clear { + self.push_instruction(Instruction::ClearCell(cell.clone())); + } + + let (imm, adds, subs) = expr.flatten()?; + + self.push_instruction(Instruction::AddToCell(cell.clone(), imm)); + + let mut adds_set = HashMap::new(); + for var in adds { + let n = adds_set.remove(&var).unwrap_or(0); + adds_set.insert(var, n + 1); + } + for var in subs { + let n = adds_set.remove(&var).unwrap_or(0); + adds_set.insert(var, n - 1); + } + + for (source, constant) in adds_set { + let source_cell = self.get_cell(&source)?; + //If we have an instance of the original cell being added simply use our temp cell value + // (crucial special sauce) + if source_cell.memory_id == cell.memory_id && source_cell.index == cell.index { + self._copy_cell(temp_cell, cell.clone(), constant); + } else { + self._copy_cell(source_cell, cell.clone(), constant); + } + } + //Cleanup + self.push_instruction(Instruction::ClearCell(temp_cell)); + self.push_instruction(Instruction::Free(temp_mem_id)); + + Ok(()) + } + + /// Helper function to copy a cell from one to another, leaving the original unaffected + // TODO: make one for draining a cell + fn _copy_cell( + &mut self, + source_cell: CellReference, + target_cell: CellReference, + constant: i32, + ) { + if constant == 0 { + return; + } + // allocate a temporary cell + let temp_mem_id = self.push_memory_id(); + self.push_instruction(Instruction::Allocate( + Memory::Cell { id: temp_mem_id }, + None, + )); + let temp_cell = CellReference { + memory_id: temp_mem_id, + index: None, + }; + // copy source to target and temp + self.push_instruction(Instruction::OpenLoop(source_cell)); + self.push_instruction(Instruction::AddToCell(target_cell, constant as u8)); + self.push_instruction(Instruction::AddToCell(temp_cell, 1)); + self.push_instruction(Instruction::AddToCell(source_cell, -1i8 as u8)); + self.push_instruction(Instruction::CloseLoop(source_cell)); + // copy back from temp + self.push_instruction(Instruction::OpenLoop(temp_cell)); + self.push_instruction(Instruction::AddToCell(source_cell, 1)); + self.push_instruction(Instruction::AddToCell(temp_cell, -1i8 as u8)); + self.push_instruction(Instruction::CloseLoop(temp_cell)); + self.push_instruction(Instruction::Free(temp_mem_id)); + } } diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index 3d4a577..2942ed2 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -1,29 +1,33 @@ #![allow(dead_code)] +// dead code is allowed because we have two different compile targets (wasm and command-line) -mod macros; - -// allowing dead code because we have two different compile targets (wasm and command-line) +// crate dependencies: mod backend; mod brainfuck; mod brainfuck_optimiser; mod cells; mod constants_optimiser; mod frontend; +mod macros; mod misc; mod parser; mod preprocessor; -mod tokeniser; - mod tests; +mod tokeniser; +use crate::{ + backend::BrainfuckOpcodes, + brainfuck::{BrainfuckConfig, BrainfuckContext}, + cells::{TapeCell, TapeCell2D}, + misc::MastermindContext, + parser::parse, + preprocessor::preprocess_from_memory, + tokeniser::tokenise, +}; -use backend::BrainfuckOpcodes; -use brainfuck::{BrainfuckConfig, BrainfuckContext}; -use misc::MastermindContext; -use parser::parse; -use preprocessor::preprocess_from_memory; -use tokeniser::tokenise; - +// stdlib dependencies: use std::collections::HashMap; + +// external dependencies: use wasm_bindgen::{prelude::wasm_bindgen, JsValue}; pub fn set_panic_hook() { @@ -49,9 +53,15 @@ pub fn wasm_compile( let preprocessed_file = preprocess_from_memory(&file_contents, entry_file_name)?; let tokens = tokenise(&preprocessed_file)?; - let parsed_syntax = parse(&tokens)?; - let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); - let bf_code = ctx.ir_to_bf(instructions, None)?; + let bf_code = if ctx.config.enable_2d_grid { + let parsed_syntax = parse::(&tokens)?; + let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); + ctx.ir_to_bf(instructions, None)? + } else { + let parsed_syntax = parse::(&tokens)?; + let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); + ctx.ir_to_bf(instructions, None)? + }; Ok(match ctx.config.optimise_generated_code { true => ctx.optimise_bf_code(bf_code).to_string(), diff --git a/compiler/src/main.rs b/compiler/src/main.rs index 0c276ba..1caac03 100644 --- a/compiler/src/main.rs +++ b/compiler/src/main.rs @@ -1,35 +1,35 @@ #![allow(dead_code)] +// dead code is allowed because we have two different compile targets (wasm and command-line) -mod macros; - +// crate dependencies: mod backend; mod brainfuck; mod brainfuck_optimiser; mod cells; mod constants_optimiser; mod frontend; +mod macros; +mod misc; mod parser; mod preprocessor; -mod tokeniser; - -mod misc; mod tests; +mod tokeniser; +use crate::{ + backend::BrainfuckOpcodes, + brainfuck::{BrainfuckConfig, BrainfuckContext}, + cells::{TapeCell, TapeCell2D}, + misc::{MastermindConfig, MastermindContext}, + parser::parse, + preprocessor::preprocess, + tokeniser::tokenise, +}; -use backend::BrainfuckOpcodes; -use misc::MastermindConfig; -use parser::parse; -use preprocessor::preprocess; -use tokeniser::tokenise; - +// stdlib dependencies: use std::io::{stdin, stdout, Cursor}; +// external dependencies: use clap::Parser; -use crate::{ - brainfuck::{BrainfuckConfig, BrainfuckContext}, - misc::MastermindContext, -}; - #[derive(Parser, Default, Debug)] #[command(author = "Heathcorp", version = "0.1", about = "Mastermind: the Brainfuck interpreter and compilation tool", long_about = None)] struct Arguments { @@ -97,17 +97,16 @@ fn main() -> Result<(), String> { let bf_program = match args.compile { true => { // compile the provided file - let tokens = tokenise(&program)?; - // parse tokens into syntax tree - let clauses = parse(&tokens)?; - // compile syntax tree into brainfuck - - // 2 stage compilation step, first stage compiles syntax tree into low-level instructions - // second stage translates the low-level instructions into brainfuck - - let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); - let bf_code = ctx.ir_to_bf(instructions, None)?; + let bf_code = if ctx.config.enable_2d_grid { + let parsed_syntax = parse::(&tokens)?; + let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); + ctx.ir_to_bf(instructions, None)? + } else { + let parsed_syntax = parse::(&tokens)?; + let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); + ctx.ir_to_bf(instructions, None)? + }; match ctx.config.optimise_generated_code { true => ctx.optimise_bf_code(bf_code).to_string(), diff --git a/compiler/src/parser.rs b/compiler/src/parser.rs index c5f942c..d1ee930 100644 --- a/compiler/src/parser.rs +++ b/compiler/src/parser.rs @@ -1,5 +1,5 @@ use crate::{ - cells::{TapeCell, TapeCell2D}, + cells::{TapeCell, TapeCell2D, TapeCellLocation}, macros::macros::{r_assert, r_panic}, tokeniser::Token, }; @@ -155,9 +155,9 @@ fn parse_struct_clause(clause: &[Token]) -> Result(&braced_tokens[j..], true)?; j += len; - fields.push(field); + fields.push(field.try_into()?); r_assert!( j <= braced_tokens.len(), "Struct definition field exceeded braces. This should never occur. {clause:#?}" @@ -478,19 +478,6 @@ fn parse_assert_clause(clause: &[Token]) -> Result, String> { } } -pub trait TapeCellLocation -where - Self: Sized, -{ - // parse any memory location specifiers - // let g @(4,2) = 68; - // or - // let p @3 = 68; - fn parse_location_specifier( - tokens: &[Token], - ) -> Result<(LocationSpecifier, usize), String>; -} - impl TapeCellLocation for TapeCell { fn parse_location_specifier( tokens: &[Token], @@ -504,7 +491,7 @@ impl TapeCellLocation for TapeCell { let mut i = 1; match &tokens[i] { - Token::Minus => { + Token::Minus | Token::Digits(_) => { let mut positive = true; if let Token::Minus = &tokens[i] { i += 1; @@ -530,9 +517,17 @@ impl TapeCellLocation for TapeCell { Ok((LocationSpecifier::Variable(var), i)) } - _ => r_panic!("Invalid location specifier: {tokens:#?}"), + _ => r_panic!( + "Invalid location specifier: {:?}", + &tokens[0..(tokens.len().min(5))] + ), } } + + fn to_positive_cell_offset(&self) -> Result { + r_assert!(self.0 >= 0, "Expected non-negative cell offset."); + Ok(self.0 as usize) + } } impl TapeCellLocation for TapeCell2D { @@ -610,6 +605,14 @@ impl TapeCellLocation for TapeCell2D { _ => r_panic!("Expected constant or variable in location specifier: {tokens:#?}"), } } + + fn to_positive_cell_offset(&self) -> Result { + r_assert!( + self.1 == 0 && self.0 >= 0, + "Expected non-negative 1st dimensional cell offset (i.e. (x,y) where y=0)." + ); + Ok(self.0 as usize) + } } fn parse_brainfuck_clause(clause: &[Token]) -> Result, String> { @@ -1285,7 +1288,7 @@ pub enum Clause { }, DefineStruct { name: String, - fields: Vec>, + fields: Vec, }, AddToVariable { var: VariableTarget, @@ -1368,9 +1371,9 @@ pub enum VariableTypeReference { } #[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub enum LocationSpecifier { +pub enum LocationSpecifier { None, - Cell(TapeCell), + Cell(TC), Variable(VariableTarget), } impl LocationSpecifier { @@ -1380,13 +1383,63 @@ impl LocationSpecifier { } #[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub struct VariableDefinition { +pub struct VariableDefinition { pub name: String, pub var_type: VariableTypeReference, - pub location_specifier: LocationSpecifier, + pub location_specifier: LocationSpecifier, // Infinite {name: String, pattern: ???}, } +#[derive(Debug, Clone, PartialEq)] +pub struct StructFieldDefinition { + pub name: String, + pub field_type: VariableTypeReference, + pub location_offset_specifier: Option, +} +// let non_neg_location_specifier = match &var_def.location_specifier { +// LocationSpecifier::None => None, +// LocationSpecifier::Cell(l) => { +// // assert the y coordinate is 0 +// // r_assert!( +// // l.1 == 0, +// // "Struct field location specifiers do not support 2D grid cells: {var_def}" +// // ); +// r_assert!( +// l.0 >= 0, +// "Struct field location specifiers must be non-negative: {var_def}" +// ); +// Some(l.0 as usize) +// } +// LocationSpecifier::Variable(_) => { +// r_panic!( "Location specifiers in struct definitions must be relative, not variables: {var_def}") +// } +// }; +impl TryInto for VariableDefinition +where + TC: TapeCellLocation, +{ + type Error = String; + + fn try_into(self) -> Result { + let location_offset_specifier = match &self.location_specifier { + LocationSpecifier::None => None, + LocationSpecifier::Cell(cell) => Some(match cell.to_positive_cell_offset() { + Ok(offset) => offset, + Err(err) => r_panic!("Cannot create struct field \"{self}\". {err}"), + }), + LocationSpecifier::Variable(_) => r_panic!( + "Location specifiers in struct definitions \ +must be relative, not variable." + ), + }; + Ok(StructFieldDefinition { + name: self.name, + field_type: self.var_type, + location_offset_specifier, + }) + } +} + #[derive(Debug, Clone, Hash, PartialEq, Eq)] pub enum Reference { NamedField(String), diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index 524c32f..c58a817 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -6,6 +6,7 @@ pub mod black_box_tests { use crate::{ backend::{BrainfuckOpcodes, Opcode2D}, brainfuck::{bvm_tests::run_code, BrainfuckConfig}, + cells::{TapeCell, TapeCell2D, TapeCellVariant}, misc::{MastermindConfig, MastermindContext}, parser::parse, tokeniser::{tokenise, Token}, @@ -88,10 +89,13 @@ pub mod black_box_tests { const TESTING_BVM_MAX_STEPS: usize = 100_000_000; - fn compile_and_run(program: String, input: String) -> Result { + fn compile_and_run>( + program: String, + input: String, + ) -> Result { let ctx = MastermindContext { config: OPT_NONE }; let tokens: Vec = tokenise(&program)?; - let clauses = parse(&tokens)?; + let clauses = parse::(&tokens)?; let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); let bf_program = ctx.ir_to_bf(instructions, None)?; let bfs = bf_program.to_string(); @@ -105,7 +109,7 @@ pub mod black_box_tests { )) } - fn compile_program( + fn compile_program>( program: String, config: Option, ) -> Result, String> { @@ -113,7 +117,7 @@ pub mod black_box_tests { config: config.unwrap_or(OPT_NONE), }; let tokens: Vec = tokenise(&program)?; - let clauses = parse(&tokens)?; + let clauses = parse::(&tokens)?; let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); let bf_code = ctx.ir_to_bf(instructions, None)?; @@ -125,7 +129,7 @@ pub mod black_box_tests { let program = String::from(""); let input = String::from(""); let desired_output = String::from(""); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -133,7 +137,7 @@ pub mod black_box_tests { // #[test] fn dummy_compile_fail_test() { let program = String::from(""); - let result = compile_program(program, None); + let result = compile_program::(program, None); assert!(result.is_err()); } @@ -141,7 +145,9 @@ pub mod black_box_tests { fn dummy_code_test() { let program = String::from(""); let desired_code = String::from(""); - let code = compile_program(program, None).expect("").to_string(); + let code = compile_program::(program, None) + .expect("") + .to_string(); println!("{code}"); assert_eq!(desired_code, code); @@ -157,7 +163,7 @@ pub mod black_box_tests { let program = String::from(""); let input = String::from(""); let desired_output = String::from(""); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); assert_eq!(desired_output, output) } @@ -166,7 +172,7 @@ pub mod black_box_tests { let program = String::from(";"); let input = String::from(""); let desired_output = String::from(""); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); assert_eq!(desired_output, output) } @@ -175,7 +181,7 @@ pub mod black_box_tests { let program = String::from(";;;;;;"); let input = String::from(""); let desired_output = String::from(""); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); assert_eq!(desired_output, output) } @@ -184,7 +190,7 @@ pub mod black_box_tests { let program = String::from(";;{;{;};};;;"); let input = String::from(""); let desired_output = String::from(""); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); assert_eq!(desired_output, output) } @@ -210,7 +216,10 @@ output ten; ); let input = String::from(""); let desired_output = String::from("hello\n"); - assert_eq!(desired_output, compile_and_run(program, input).expect("")); + assert_eq!( + desired_output, + compile_and_run::(program, input).expect("") + ); } #[test] @@ -227,7 +236,10 @@ output 10; ); let input = String::from(""); let desired_output = String::from("hello\n"); - assert_eq!(desired_output, compile_and_run(program, input).expect("")) + assert_eq!( + desired_output, + compile_and_run::(program, input).expect("") + ) } #[test] @@ -249,7 +261,7 @@ output 70; ); let input = String::from(""); let desired_output = String::from("hello\n\n\0F"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -270,7 +282,7 @@ output "What?"; ); let input = String::from(""); let desired_output = String::from("Hello.\nWhat?"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -285,7 +297,7 @@ output ['o', '.', '\n']; ); let input = String::from(""); let desired_output = String::from("Hello.\n"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -299,7 +311,7 @@ output '@' + 256 + 1 + false + true + 'e' - '@'; ); let input = String::from(""); let desired_output = String::from("g"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -324,7 +336,7 @@ if q { ); let input = String::from(""); let desired_output = String::from("Hi friend!\npath b"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -356,7 +368,7 @@ if not_a - 'a' { ); let input = String::from(""); let desired_output = String::from("ACb"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -378,7 +390,7 @@ output A; ); let input = String::from(""); let desired_output = String::from("666666 G"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -395,7 +407,7 @@ output '0' + x; ); let input = String::from(""); let desired_output = String::from("56"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -412,7 +424,7 @@ output '0' + x; ); let input = String::from(""); let desired_output = String::from("56"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -428,7 +440,7 @@ output '0' + x; ); let input = String::from(""); let desired_output = String::from("5;"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -445,7 +457,7 @@ output '0' + x; ); let input = String::from(""); let desired_output = String::from("26"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -461,7 +473,7 @@ output '0' + x; ); let input = String::from(""); let desired_output = String::from("3"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -481,7 +493,7 @@ output *x; ); let input = String::from(""); let desired_output = String::from("82"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -501,7 +513,7 @@ output *x; ); let input = String::from(""); let desired_output = String::from("79"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -516,7 +528,7 @@ output x - 2; ); let input = String::from(""); let desired_output = String::from("~"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -533,7 +545,7 @@ output x + 'f' + 1; ); let input = String::from(""); let desired_output = String::from("f"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -550,7 +562,7 @@ output x + 'f'; ); let input = String::from(""); let desired_output = String::from("f"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -566,7 +578,7 @@ output x + 'f'; ); let input = String::from(""); let desired_output = String::from("f"); - let code = compile_program(program, Some(OPT_ALL))?; + let code = compile_program::(program, Some(OPT_ALL))?; assert_eq!( desired_output, run_code(BVM_CONFIG_1D, code.to_string(), input, None) @@ -586,7 +598,7 @@ output x + 'f'; ); let input = String::from(""); let desired_output = String::from("f"); - let code = compile_program(program, Some(OPT_ALL))?; + let code = compile_program::(program, Some(OPT_ALL))?; assert_eq!( desired_output, run_code(BVM_CONFIG_1D, code.to_string(), input, None) @@ -616,7 +628,10 @@ drain a { ); let input = String::from(""); let desired_output = String::from("0AB\n1ABB\n2ABBB\n3ABBBB\n4ABBBBB\n5ABBBBBB\n6ABBBBBBB\n7ABBBBBBBB\n8ABBBBBBBBB\n9ABBBBBBBBBB\n"); - assert_eq!(desired_output, compile_and_run(program, input).expect("")) + assert_eq!( + desired_output, + compile_and_run::(program, input).expect("") + ) } #[test] @@ -642,7 +657,10 @@ drain g into a {output a;} ); let input = String::from(""); let desired_output = String::from("AABAA\nBBDAB\nCCGAC\nDDKAD\neefghi"); - assert_eq!(desired_output, compile_and_run(program, input).expect("")) + assert_eq!( + desired_output, + compile_and_run::(program, input).expect("") + ) } #[test] @@ -655,7 +673,10 @@ output 'h'; ); let input = String::from(""); let desired_output = String::from("h"); - assert_eq!(desired_output, compile_and_run(program, input).expect("")) + assert_eq!( + desired_output, + compile_and_run::(program, input).expect("") + ) } #[test] @@ -695,7 +716,7 @@ output 10; ); let input = String::from(""); let desired_output = String::from("ACE\n"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -739,7 +760,7 @@ output 10; ); let input = String::from(""); let desired_output = String::from("ACE\n"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -758,7 +779,7 @@ output 10; ); let input = String::from(""); let desired_output = String::from("5\n"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -793,7 +814,7 @@ drain a { ); let input = String::from(""); let desired_output = String::from("0ABB\n1ABB\n2ABB\n3ABBBBBBBBBB\n4ABB\n5ABB\n"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -830,7 +851,7 @@ output 10; ); let input = String::from(""); let desired_output = String::from("010131\n"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -866,7 +887,7 @@ output 10; ); let input = String::from(""); let desired_output = String::from("01231\n"); - let code = compile_program(program, Some(OPT_NONE))?.to_string(); + let code = compile_program::(program, Some(OPT_NONE))?.to_string(); println!("{}", code); let output = run_code(BVM_CONFIG_1D, code, input, None); println!("{output}"); @@ -940,7 +961,7 @@ fn func_2(cell[4] think, cell green) { ); let input = String::from(""); let desired_output = String::from("01202726631\n@1202726631\n"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -960,7 +981,7 @@ fn add_one(cell cel) { ); let input = String::from(""); let desired_output = String::from("ABCD"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -987,7 +1008,7 @@ fn add_one_to_three(cell[3] t) { ); let input = String::from(""); let desired_output = String::from("111"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1014,7 +1035,7 @@ fn add_one(cell t) { ); let input = String::from(""); let desired_output = String::from("12"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1052,7 +1073,7 @@ fn add_one(struct A t) { ); let input = String::from(""); let desired_output = String::from("12\n23"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1091,7 +1112,7 @@ fn add_one(struct A t, cell a) { ); let input = String::from(""); let desired_output = String::from("12\n33"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1135,7 +1156,7 @@ fn add_one(struct A tfoaishjdf, cell aaewofjas) { ); let input = String::from(""); let desired_output = String::from("12\n33"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1154,7 +1175,7 @@ output 10; ); let input = String::from(""); let desired_output = String::from("hello\n"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1180,7 +1201,7 @@ output 10; ); let input = String::from(""); let desired_output = String::from("hello\nhello: g\n"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1207,7 +1228,7 @@ output 10; ); let input = String::from(""); let desired_output = String::from("hello\nhello: g\n"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1224,7 +1245,7 @@ output b; ); let input = String::from("A"); let desired_output = String::from("B"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1251,7 +1272,7 @@ output b[0]; ); let input = String::from("ABC"); let desired_output = String::from("ABC\nDDD"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1280,7 +1301,7 @@ output c; ); let input = String::from(""); let desired_output = String::from("FooFpp\nZ"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1315,7 +1336,7 @@ output *v; ); let input = String::from(""); let desired_output = String::from("hhh hh hello"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1335,7 +1356,7 @@ output *v; ); let input = String::from(""); let desired_output = String::from("Freidns\n"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1355,7 +1376,7 @@ output f; ); let input = String::from(""); let desired_output = String::from("fFf"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1397,7 +1418,7 @@ output g[2][3]; ); let input = String::from(""); let desired_output = String::from("543112320003"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1423,7 +1444,7 @@ output '0' + a.yellow; ); let input = String::from(""); let desired_output = String::from("0064"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1450,7 +1471,7 @@ output '0' + a.yellow; ); let input = String::from(""); let desired_output = String::from("3452"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1479,7 +1500,7 @@ output a.green; ); let input = String::from("gh"); let desired_output = String::from("hg"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1512,7 +1533,7 @@ struct AA { ); let input = String::from("gh"); let desired_output = String::from("hg"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1550,7 +1571,7 @@ struct AA { ); let input = String::from("ghpalindrome"); let desired_output = String::from("nhg"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1584,7 +1605,7 @@ output '\n'; ); let input = String::from("hellow"); let desired_output = String::from("helowl\n"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1612,7 +1633,7 @@ output '\n'; ); let input = String::from("gy0123"); let desired_output = String::from("0123yg\n"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1650,7 +1671,7 @@ output '\n'; ); let input = String::from("gy-+t"); let desired_output = String::from("t-+yg\n"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1674,7 +1695,7 @@ output '\n'; ); let input = String::from("0123a"); let desired_output = String::from("a\n"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1697,7 +1718,7 @@ output '0' + as[1].green; ); let input = String::from(""); let desired_output = String::from("53"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1720,7 +1741,7 @@ struct AAA { ); let input = String::from(""); let desired_output = String::from("53"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1757,7 +1778,7 @@ output as[1].green; ); let input = String::from("tr"); let desired_output = String::from("HI\n6tr"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1805,7 +1826,7 @@ output as[1].bbb[2].green; ); let input = String::from("abcdefgh"); let desired_output = String::from("HI\ngabchdef"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1853,7 +1874,7 @@ output as[1].bbb[2].green; ); let input = String::from("abcdefgh"); let desired_output = String::from("HI\ngabchdef"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1888,7 +1909,7 @@ bf @2 { ); let input = String::from(""); let desired_output = String::from("jkl"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1911,7 +1932,7 @@ struct Frame f; ); let input = String::from(""); let desired_output = String::from(""); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1934,7 +1955,7 @@ struct Frame f; ); let input = String::from(""); let desired_output = String::from(""); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1959,7 +1980,7 @@ output g.b; ); let input = String::from(""); let desired_output = String::from("ab"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1983,7 +2004,7 @@ bf @4 { ); let input = String::from(""); let desired_output = String::from("55"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1998,7 +2019,7 @@ output '0' + sizeof(cell); ); let input = String::from(""); let desired_output = String::from("1"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2013,7 +2034,7 @@ output '0' + sizeof(cell[5]); ); let input = String::from(""); let desired_output = String::from("5"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2032,7 +2053,7 @@ output '0' + sizeof(b[2]); ); let input = String::from(""); let desired_output = String::from("141"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2051,7 +2072,7 @@ output '0' + s; ); let input = String::from(""); let desired_output = String::from("1"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2070,7 +2091,7 @@ output '0' + s; ); let input = String::from(""); let desired_output = String::from("3"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2089,7 +2110,7 @@ output '0' + s; ); let input = String::from(""); let desired_output = String::from("6"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2109,7 +2130,7 @@ output '0' + sizeof(g); ); let input = String::from(""); let desired_output = String::from("2"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2133,7 +2154,7 @@ output '0' + sizeof(g[0].red); ); let input = String::from(""); let desired_output = String::from("115"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2154,7 +2175,7 @@ output '0' + sizeof(g[2].blue) ); let input = String::from(""); let desired_output = String::from("391"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2185,7 +2206,7 @@ output '0' + sizeof(g[2].blue) ); let input = String::from(""); let desired_output = String::from("23612"); - let output = compile_and_run(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2205,7 +2226,7 @@ cell foo @3 = 2; output foo; "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from(""); @@ -2225,7 +2246,7 @@ cell foo @0 = 2; cell b = 10; "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); assert!(code.starts_with(">>>>>++++<<<<<++>++++++++++")); @@ -2241,7 +2262,7 @@ cell foo @0 = 2; cell b = 3; "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); assert!(code.starts_with(">+<++>>+++")); @@ -2249,42 +2270,7 @@ cell b = 3; } #[test] - fn memory_specifiers_4() -> Result<(), String> { - let program = String::from( - r#" -cell a @1,2 = 1; -cell foo @0 = 2; -cell b = 3; -"#, - ); - let code = compile_program(program, None)?.to_string(); - println!("{code}"); - - assert!(code.starts_with(">^^++++")); - Ok(()) - } - - #[test] - fn memory_specifiers_5() -> Result<(), String> { - let program = String::from( - r#" -cell[4][3] g @1,2; -g[0][0] = 1; -g[1][1] = 2; -g[2][2] = 3; -cell foo @0 = 2; -cell b = 3; -"#, - ); - let code = compile_program(program, None)?.to_string(); - println!("{code}"); - - assert!(code.starts_with(">^^[-]+>>>>>[-]++>>>>>[-]+++<<<<<<<<<<+++")); - Ok(()) - } - - #[test] - fn memory_specifiers_6() { + fn memory_specifiers_4() { let program = String::from( r#" cell a @1 = 1; @@ -2292,7 +2278,7 @@ cell foo @1 = 2; cell b = 3; "#, ); - let code = compile_program(program, None); + let code = compile_program::(program, None); assert!(code.is_err()); assert!(code .unwrap_err() @@ -2300,56 +2286,6 @@ cell b = 3; .contains("Location specifier @1,0 conflicts with another allocation")); } - #[test] - fn memory_specifiers_7() { - let program = String::from( - r#" -cell a @1,3 = 1; -cell foo @1,3 = 2; -cell b = 3; -"#, - ); - let code = compile_program(program, None); - assert!(code.is_err()); - assert!(code - .unwrap_err() - .to_string() - .contains("Location specifier @1,3 conflicts with another allocation")); - } - - #[test] - fn memory_specifiers_8() { - let program = String::from( - r#" -cell a @2 = 1; -cell foo @2,0 = 2; -cell b = 3; -"#, - ); - let code = compile_program(program, None); - assert!(code.is_err()); - assert!(code - .unwrap_err() - .to_string() - .contains("Location specifier @2,0 conflicts with another allocation")); - } - - #[test] - fn memory_specifiers_9() { - let program = String::from( - r#" -cell a @2,4 = 1; -cell[4] b @0,4; -"#, - ); - let code = compile_program(program, None); - assert!(code.is_err()); - assert!(code - .unwrap_err() - .to_string() - .contains("Location specifier @0,4 conflicts with another allocation")); - } - #[test] fn variable_location_specifiers_1() -> Result<(), String> { let program = String::from( @@ -2358,7 +2294,7 @@ cell a = 'h'; bf @a {.} "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from("wxy"); @@ -2378,7 +2314,7 @@ cell[4] b; bf @a {.} "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from(""); @@ -2400,7 +2336,7 @@ bf @t.a { } "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from("wxy"); @@ -2423,7 +2359,7 @@ bf @t { } "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from("wxy"); @@ -2444,7 +2380,7 @@ output 10; output *f; "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from(""); @@ -2462,7 +2398,7 @@ cell[4] f @8 = "xyz "; bf @f {[.>]} "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from(""); @@ -2484,7 +2420,7 @@ cell a = '5'; func(a); "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from(""); @@ -2506,7 +2442,7 @@ cell[3] a = "456"; func(a[1]); "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from(""); @@ -2532,7 +2468,7 @@ a.r[2] = '6'; func(a.r[1]); "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from(""); @@ -2558,7 +2494,7 @@ a.r[2] = '6'; func(a); "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from(""); @@ -2584,7 +2520,7 @@ a.jj.j[1] = '4'; func(a.jj.j); "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from(""); @@ -2605,7 +2541,7 @@ a = 0; output a; "#, ); - let code = compile_program(program, Some(OPT_ALL))?.to_string(); + let code = compile_program::(program, Some(OPT_ALL))?.to_string(); println!("{code}"); assert!(code.starts_with("+++++.--.")); @@ -2623,7 +2559,7 @@ a = 0; output a; "#, ); - let code = compile_program(program, Some(OPT_ALL))?.to_string(); + let code = compile_program::(program, Some(OPT_ALL))?.to_string(); println!("{code}"); assert!(code.starts_with("++.[-].")); @@ -2640,7 +2576,7 @@ bf { } "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); assert_eq!( @@ -2665,7 +2601,7 @@ bf @3 { } "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); assert!(code.starts_with( @@ -2700,7 +2636,7 @@ bf @0 clobbers *str { assert *str equals 0; "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); assert!(code.starts_with(",>,>,<<[+>]<<<[.[-]>]<<<")); @@ -2732,7 +2668,7 @@ bf { } "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let output = run_code(BVM_CONFIG_1D, code, String::from("line of input\n"), None); @@ -2770,7 +2706,7 @@ bf { } "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let output = run_code(BVM_CONFIG_1D, code, String::from("hello\n"), None); @@ -2793,7 +2729,7 @@ bf { } "#, ); - let result = compile_program(program, None); + let result = compile_program::(program, None); assert!(result.is_err()); Ok(()) @@ -2810,7 +2746,7 @@ bf { } "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); assert_eq!(code, ",>,>,<<>>>>>+[-]<<<<<"); @@ -2823,7 +2759,7 @@ bf { bf {,.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvvv.+++.------.vv-.^^^^+.} "#, ); - let code = compile_program(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); assert_eq!( code, @@ -2842,7 +2778,7 @@ bf { bf {,.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvstvv.+++.------.vv-.^^^^+.} "#, ); - let _result = compile_program(program, None); + let _result = compile_program::(program, None); } #[test] @@ -2867,7 +2803,7 @@ output 'h'; let input = String::from(""); let desired_output = String::from("h"); - let code = compile_program(program, Some(OPT_ALL))?; + let code = compile_program::(program, Some(OPT_ALL))?; println!("{}", code.clone().to_string()); assert_eq!( desired_output, @@ -2893,12 +2829,14 @@ output a + 3; let input = String::from(""); let desired_output = String::from("tIJ"); - let code = compile_program(program, Some(OPT_ALL))?.to_string(); + let code = compile_program::(program, Some(OPT_ALL))?.to_string(); println!("{}", code); assert_eq!(desired_output, run_code(BVM_CONFIG_1D, code, input, None)); Ok(()) } + + // TODO: remove the need for this #[test] #[should_panic(expected = "Memory Allocation Method not implemented")] fn unimplemented_memory_allocation() { @@ -2920,8 +2858,94 @@ output a + 3; memory_allocation_method: 128, enable_2d_grid: false, }; - let _code = compile_program(program, Some(cfg)); + let _code = compile_program::(program, Some(cfg)); + } + + #[test] + fn memory_specifiers_2d_1() -> Result<(), String> { + let program = String::from( + r#" +cell a @1,2 = 1; +cell foo @0 = 2; +cell b = 3; +"#, + ); + let code = compile_program::(program, None)?.to_string(); + println!("{code}"); + + assert!(code.starts_with(">^^++++")); + Ok(()) } + + #[test] + fn memory_specifiers_2d_2() -> Result<(), String> { + let program = String::from( + r#" +cell[4][3] g @1,2; +g[0][0] = 1; +g[1][1] = 2; +g[2][2] = 3; +cell foo @0 = 2; +cell b = 3; +"#, + ); + let code = compile_program::(program, None)?.to_string(); + println!("{code}"); + + assert!(code.starts_with(">^^[-]+>>>>>[-]++>>>>>[-]+++<<<<<<<<<<+++")); + Ok(()) + } + + #[test] + fn memory_specifiers_2d_3() { + let program = String::from( + r#" +cell a @1,3 = 1; +cell foo @1,3 = 2; +cell b = 3; +"#, + ); + let code = compile_program::(program, None); + assert!(code.is_err()); + assert!(code + .unwrap_err() + .to_string() + .contains("Location specifier @1,3 conflicts with another allocation")); + } + + #[test] + fn memory_specifiers_2d_4() { + let program = String::from( + r#" +cell a @2 = 1; +cell foo @2,0 = 2; +cell b = 3; +"#, + ); + let code = compile_program::(program, None); + assert!(code.is_err()); + assert!(code + .unwrap_err() + .to_string() + .contains("Location specifier @2,0 conflicts with another allocation")); + } + + #[test] + fn memory_specifiers_2d_5() { + let program = String::from( + r#" +cell a @2,4 = 1; +cell[4] b @0,4; +"#, + ); + let code = compile_program::(program, None); + assert!(code.is_err()); + assert!(code + .unwrap_err() + .to_string() + .contains("Location specifier @0,4 conflicts with another allocation")); + } + #[test] fn tiles_memory_allocation_1() -> Result<(), String> { let program = String::from( @@ -2939,7 +2963,7 @@ cell j = 1; ); let desired_output = String::from("+vv+^^+>vv+^+^+"); - let code = compile_program(program, Some(OPT_NONE_TILES))?.to_string(); + let code = compile_program::(program, Some(OPT_NONE_TILES))?.to_string(); assert_eq!(desired_output, code); Ok(()) @@ -2971,14 +2995,16 @@ output i; let input = String::from(""); let desired_output = String::from("123456789"); - let code = compile_program(program, Some(OPT_NONE_TILES))?.to_string(); + let code = compile_program::(program, Some(OPT_NONE_TILES))?.to_string(); println!("{}", code); assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); Ok(()) } + // TODO: decipher this #[test] + #[ignore] fn tiles_memory_allocation_3() { let program = String::from( r#" @@ -2986,7 +3012,7 @@ cell a @2,4 = 1; cell[4] b @0,4; "#, ); - let code = compile_program(program, Some(OPT_NONE_TILES)); + let code = compile_program::(program, Some(OPT_NONE_TILES)); assert!(code.is_err()); assert!(code .unwrap_err() @@ -3012,7 +3038,7 @@ output b[3]; output a; "#, ); - let code = compile_program(program, Some(OPT_NONE_TILES))?.to_string(); + let code = compile_program::(program, Some(OPT_NONE_TILES))?.to_string(); println!("{}", code); let input = String::from(""); let desired_output = String::from("12345"); @@ -3037,7 +3063,7 @@ cell j = 1; ); let desired_output = String::from("+>+<^+>>v+<^+<^+>>>vv+<^+<^+"); - let code = compile_program(program, Some(OPT_NONE_ZIG_ZAG))?.to_string(); + let code = compile_program::(program, Some(OPT_NONE_ZIG_ZAG))?.to_string(); assert_eq!(desired_output, code); Ok(()) @@ -3069,14 +3095,16 @@ output i; let input = String::from(""); let desired_output = String::from("123456789"); - let code = compile_program(program, Some(OPT_NONE_ZIG_ZAG))?.to_string(); + let code = compile_program::(program, Some(OPT_NONE_ZIG_ZAG))?.to_string(); println!("{}", code); assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); Ok(()) } + // TODO: decipher this #[test] + #[ignore] fn zig_zag_memory_allocation_3() { let program = String::from( r#" @@ -3084,12 +3112,12 @@ cell a @2,4 = 1; cell[4] b @0,4; "#, ); - let code = compile_program(program, Some(OPT_NONE_ZIG_ZAG)); + let code = compile_program::(program, Some(OPT_NONE_ZIG_ZAG)); assert!(code.is_err()); assert!(code .unwrap_err() .to_string() - .contains("Location specifier @0,4 conflicts with another allocation")); + .contains("Location specifier @(0, 4) conflicts with another allocation")); } #[test] @@ -3110,7 +3138,7 @@ output b[3]; output a; "#, ); - let code = compile_program(program, Some(OPT_NONE_ZIG_ZAG))?.to_string(); + let code = compile_program::(program, Some(OPT_NONE_ZIG_ZAG))?.to_string(); println!("{}", code); let input = String::from(""); let desired_output = String::from("12345"); @@ -3135,7 +3163,9 @@ cell j = 1; ); let desired_output = String::from("^+>+v+<+<+^+^+>+>+"); - let code = compile_program(program, Some(OPT_NONE_SPIRAL))?.to_string(); + // TODO: fix this, this should fail in its current state + let code = compile_program::(program, Some(OPT_NONE_SPIRAL))?.to_string(); + println!("{code}"); assert_eq!(desired_output, code); Ok(()) @@ -3167,14 +3197,16 @@ output i; let input = String::from(""); let desired_output = String::from("123456789"); - let code = compile_program(program, Some(OPT_NONE_SPIRAL))?.to_string(); + let code = compile_program::(program, Some(OPT_NONE_SPIRAL))?.to_string(); println!("{}", code); assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); Ok(()) } + // TODO: decipher this #[test] + #[ignore] fn spiral_memory_allocation_3() { let program = String::from( r#" @@ -3182,12 +3214,12 @@ cell a @2,4 = 1; cell[4] b @0,4; "#, ); - let code = compile_program(program, Some(OPT_NONE_SPIRAL)); + let code = compile_program::(program, Some(OPT_NONE_SPIRAL)); assert!(code.is_err()); assert!(code .unwrap_err() .to_string() - .contains("Location specifier @0,4 conflicts with another allocation")); + .contains("Location specifier @(0,4) conflicts with another allocation")); } #[test] @@ -3208,7 +3240,7 @@ output b[3]; output a; "#, ); - let code = compile_program(program, Some(OPT_NONE_SPIRAL))?.to_string(); + let code = compile_program::(program, Some(OPT_NONE_SPIRAL))?.to_string(); println!("{}", code); let input = String::from(""); let desired_output = String::from("12345"); diff --git a/compiler/src/tokeniser.rs b/compiler/src/tokeniser.rs index 1f4c819..f6cf204 100644 --- a/compiler/src/tokeniser.rs +++ b/compiler/src/tokeniser.rs @@ -1,7 +1,11 @@ -use regex_lite::Regex; - +// project dependencies: use crate::macros::macros::{r_assert, r_panic}; +// stdlib dependencies: + +// external dependencies: +use regex_lite::Regex; + pub fn tokenise(source: &String) -> Result, String> { let stripped = source .lines() From 81376e042a19532443ec69dac1a8deacab000016 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Thu, 16 Oct 2025 23:30:33 +1100 Subject: [PATCH 12/56] Change 2d location specifiers to be tuples --- compiler/src/backend.rs | 6 +- compiler/src/cells.rs | 21 +--- compiler/src/lib.rs | 2 +- compiler/src/main.rs | 2 +- compiler/src/parser.rs | 224 +++++++++++++++++++++++++------------- compiler/src/tests.rs | 114 +++++++++---------- compiler/src/tokeniser.rs | 2 - 7 files changed, 206 insertions(+), 165 deletions(-) diff --git a/compiler/src/backend.rs b/compiler/src/backend.rs index a21eda7..ac5988d 100644 --- a/compiler/src/backend.rs +++ b/compiler/src/backend.rs @@ -401,11 +401,7 @@ impl CellAllocator { //Check specified memory allocation above to ensure that this works nicely with all algorithms if let Some(l) = location { if !self.check_allocatable(&l, size) { - r_panic!( - "Location specifier @{0},{1} conflicts with another allocation", - l.0, - l.1 - ); + r_panic!("Location specifier @{l} conflicts with another allocation"); } } else { // should the region start at the current tape head? diff --git a/compiler/src/cells.rs b/compiler/src/cells.rs index 3aa5ef0..acb4bc3 100644 --- a/compiler/src/cells.rs +++ b/compiler/src/cells.rs @@ -1,6 +1,6 @@ use std::fmt::Display; -use crate::{parser::LocationSpecifier, tokeniser::Token}; +use crate::parser::TapeCellLocation; /// when making Brainfuck variants, for a cell location type, you must implement this trait /// for now this is implemented by TapeCell (1D location specifier), and TapeCell2D (2D) @@ -28,7 +28,7 @@ impl Display for TapeCell { impl Display for TapeCell2D { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!("({},{})", self.0, self.1))?; + f.write_fmt(format_args!("({}, {})", self.0, self.1))?; Ok(()) } } @@ -47,20 +47,3 @@ impl TapeOrigin for TapeCell2D { TapeCell2D(0, 0) } } - -pub trait TapeCellLocation -where - Self: Sized + Display, -{ - /// parse any memory location specifiers - /// let g @(4,2) = 68; - /// or - /// let p @3 = 68; - fn parse_location_specifier( - tokens: &[Token], - ) -> Result<(LocationSpecifier, usize), String>; - - /// safely cast a 2D or 1D location specifier into a 1D non-negative cell offset, - /// for use with struct fields - fn to_positive_cell_offset(&self) -> Result; -} diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index 2942ed2..e6fadcb 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -1,7 +1,7 @@ #![allow(dead_code)] // dead code is allowed because we have two different compile targets (wasm and command-line) -// crate dependencies: +// project dependencies: mod backend; mod brainfuck; mod brainfuck_optimiser; diff --git a/compiler/src/main.rs b/compiler/src/main.rs index 1caac03..380ed17 100644 --- a/compiler/src/main.rs +++ b/compiler/src/main.rs @@ -1,7 +1,7 @@ #![allow(dead_code)] // dead code is allowed because we have two different compile targets (wasm and command-line) -// crate dependencies: +// project dependencies: mod backend; mod brainfuck; mod brainfuck_optimiser; diff --git a/compiler/src/parser.rs b/compiler/src/parser.rs index d1ee930..8ef052b 100644 --- a/compiler/src/parser.rs +++ b/compiler/src/parser.rs @@ -1,8 +1,11 @@ +// project dependencies: use crate::{ - cells::{TapeCell, TapeCell2D, TapeCellLocation}, + cells::{TapeCell, TapeCell2D}, macros::macros::{r_assert, r_panic}, tokeniser::Token, }; + +// stdlib dependencies use std::{fmt::Display, mem::discriminant, num::Wrapping}; /// recursive function to create a tree representation of the program @@ -478,6 +481,77 @@ fn parse_assert_clause(clause: &[Token]) -> Result, String> { } } +fn parse_integer(tokens: &[Token]) -> Result<(i32, usize), String> { + let mut i = 0; + let mut positive = true; + if let Token::Minus = &tokens[i] { + i += 1; + positive = false; + } + + let Token::Digits(digits) = &tokens[i] else { + r_panic!("Expected number after \"-\" in integer"); + }; + i += 1; + + // TODO: error handling + let offset = digits.parse::().unwrap(); + Ok((if positive { offset } else { -offset }, i)) +} + +fn parse_integer_tuple( + tokens: &[Token], +) -> Result<([i32; LENGTH], usize), String> { + let mut i = 0; + r_assert!( + matches!(&tokens[i], Token::OpenParenthesis), + "Expected opening parenthesis in tuple: {:?}", + &tokens[0..(tokens.len().min(5))] + ); + i += 1; + + let mut tuple = [0; LENGTH]; + for (j, element) in tuple.iter_mut().enumerate() { + let (offset, len) = parse_integer(&tokens[i..])?; + i += len; + *element = offset; + + if j < LENGTH - 1 { + r_assert!( + matches!(&tokens[i], Token::Comma), + "Expected comma in tuple: {:?}", + &tokens[0..(tokens.len().min(5))] + ); + i += 1; + } + } + r_assert!( + matches!(&tokens[i], Token::ClosingParenthesis), + "Expected closing parenthesis in tuple: {:?}", + &tokens[0..(tokens.len().min(5))] + ); + i += 1; + + Ok((tuple, i)) +} + +pub trait TapeCellLocation +where + Self: Sized + Display, +{ + /// parse any memory location specifiers + /// let g @(4,2) = 68; + /// or + /// let p @3 = 68; + fn parse_location_specifier( + tokens: &[Token], + ) -> Result<(LocationSpecifier, usize), String>; + + /// safely cast a 2D or 1D location specifier into a 1D non-negative cell offset, + /// for use with struct fields + fn to_positive_cell_offset(&self) -> Result; +} + impl TapeCellLocation for TapeCell { fn parse_location_specifier( tokens: &[Token], @@ -490,38 +564,26 @@ impl TapeCellLocation for TapeCell { }; let mut i = 1; - match &tokens[i] { + let location_specifier = match &tokens[i] { Token::Minus | Token::Digits(_) => { - let mut positive = true; - if let Token::Minus = &tokens[i] { - i += 1; - positive = false; - } - - let Token::Digits(digits) = &tokens[i] else { - r_panic!("Expected number after \"-\" in location specifier: {tokens:#?}"); - }; - i += 1; - - // TODO: error handling - let offset = digits.parse::().unwrap(); - Ok(( - LocationSpecifier::Cell(TapeCell(if positive { offset } else { -offset })), - i, - )) + let (offset, len) = parse_integer(&tokens[i..])?; + i += len; + LocationSpecifier::Cell(TapeCell(offset)) } Token::Name(_) => { // variable location specifier let (var, len) = parse_var_target(&tokens[i..])?; i += len; - Ok((LocationSpecifier::Variable(var), i)) + LocationSpecifier::Variable(var) } _ => r_panic!( "Invalid location specifier: {:?}", &tokens[0..(tokens.len().min(5))] ), - } + }; + + Ok((location_specifier, i)) } fn to_positive_cell_offset(&self) -> Result { @@ -542,68 +604,31 @@ impl TapeCellLocation for TapeCell2D { }; let mut i = 1; - match &tokens[i] { - Token::Digits(_) | Token::Minus => { - let x_offset = { - let mut positive = true; - if let Token::Minus = &tokens[i] { - i += 1; - positive = false; - } - let Token::Digits(raw) = &tokens[i] else { - r_panic!( - "Expected number after \"-\" in memory location specifier: {tokens:#?}" - ); - }; - i += 1; - - // TODO: error handling - let offset = raw.parse::().unwrap(); - if positive { - offset - } else { - -offset - } - }; + let location_specifier = match &tokens[i] { + Token::OpenParenthesis => { + // parse a 2-tuple + let (tuple, len) = parse_integer_tuple::<2>(&tokens[i..])?; + i += len; - let y_offset = { - if let Token::Comma = &tokens[i] { - i += 1; - let mut positive = true; - if let Token::Minus = &tokens[i] { - i += 1; - positive = false; - } - let Token::Digits(raw) = &tokens[i] else { - r_panic!( - "Expected number after \"-\" in memory location specifier: {tokens:#?}" - ); - }; - i += 1; - - // TODO: error handling - let offset = raw.parse::().unwrap(); - if positive { - offset - } else { - -offset - } - } else { - 0 - } - }; + LocationSpecifier::Cell(TapeCell2D(tuple[0], tuple[1])) + } + Token::Digits(_) | Token::Minus => { + let (x_offset, len) = parse_integer(&tokens[i..])?; + i += len; - return Ok((LocationSpecifier::Cell(TapeCell2D(x_offset, y_offset)), i)); + LocationSpecifier::Cell(TapeCell2D(x_offset, 0)) } Token::Name(_) => { // variable location specifier let (var, len) = parse_var_target(&tokens[i..])?; i += len; - return Ok((LocationSpecifier::Variable(var), i)); + LocationSpecifier::Variable(var) } _ => r_panic!("Expected constant or variable in location specifier: {tokens:#?}"), - } + }; + + Ok((location_specifier, i)) } fn to_positive_cell_offset(&self) -> Result { @@ -1601,18 +1626,39 @@ mod parser_tests { }])) } + #[test] + fn var_v() { + assert!(parse::(&[ + Token::Cell, + Token::Name(String::from("v")), + Token::Semicolon + ]) + .unwrap() + .iter() + .eq(&[Clause::DeclareVariable { + var: VariableDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None + } + }])) + } + #[test] fn two_dimensional_1() { - let _ = parse::(&[ + assert!(parse::(&[ Token::Cell, Token::Name(String::from("x")), Token::At, + Token::OpenParenthesis, Token::Digits(String::from("0")), Token::Comma, Token::Digits(String::from("1")), + Token::ClosingParenthesis, Token::Semicolon, ]) - .expect_err(""); + .unwrap_err() + .contains("Invalid location specifier")); } #[test] @@ -1621,9 +1667,11 @@ mod parser_tests { Token::Cell, Token::Name(String::from("x")), Token::At, + Token::OpenParenthesis, Token::Digits(String::from("0")), Token::Comma, Token::Digits(String::from("1")), + Token::ClosingParenthesis, Token::Semicolon, ]) .unwrap() @@ -1636,4 +1684,30 @@ mod parser_tests { } }])); } + + #[test] + fn two_dimensional_3() { + assert!(parse::(&[ + Token::Cell, + Token::Name(String::from("xyz")), + Token::At, + Token::OpenParenthesis, + Token::Minus, + Token::Digits(String::from("10")), + Token::Comma, + Token::Minus, + Token::Digits(String::from("101")), + Token::ClosingParenthesis, + Token::Semicolon, + ]) + .unwrap() + .iter() + .eq(&[Clause::DeclareVariable { + var: VariableDefinition { + name: String::from("xyz"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::Cell(TapeCell2D(-10, -101)) + } + }])); + } } diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index c58a817..e4204f5 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -2278,12 +2278,14 @@ cell foo @1 = 2; cell b = 3; "#, ); - let code = compile_program::(program, None); - assert!(code.is_err()); - assert!(code + // assert_eq!( + // compile_program::(program, None).unwrap_err(), + // "Location specifier @1 conflicts with another allocation" + // ); + // TODO: fix the need for this + assert!(compile_program::(program, None) .unwrap_err() - .to_string() - .contains("Location specifier @1,0 conflicts with another allocation")); + .contains("conflicts with another allocation")); } #[test] @@ -2865,15 +2867,15 @@ output a + 3; fn memory_specifiers_2d_1() -> Result<(), String> { let program = String::from( r#" -cell a @1,2 = 1; +cell a @(1, 2) = 1; cell foo @0 = 2; cell b = 3; "#, ); - let code = compile_program::(program, None)?.to_string(); - println!("{code}"); - - assert!(code.starts_with(">^^++++")); + assert_eq!( + compile_program::(program, None)?.to_string(), + ">^^++++" + ); Ok(()) } @@ -2881,7 +2883,7 @@ cell b = 3; fn memory_specifiers_2d_2() -> Result<(), String> { let program = String::from( r#" -cell[4][3] g @1,2; +cell[4][3] g @(1, 2); g[0][0] = 1; g[1][1] = 2; g[2][2] = 3; @@ -2889,10 +2891,10 @@ cell foo @0 = 2; cell b = 3; "#, ); - let code = compile_program::(program, None)?.to_string(); - println!("{code}"); - - assert!(code.starts_with(">^^[-]+>>>>>[-]++>>>>>[-]+++<<<<<<<<<<+++")); + assert_eq!( + compile_program::(program, None)?.to_string(), + ">^^[-]+>>>>>[-]++>>>>>[-]+++<<<<<<<<<<+++" + ); Ok(()) } @@ -2900,17 +2902,15 @@ cell b = 3; fn memory_specifiers_2d_3() { let program = String::from( r#" -cell a @1,3 = 1; -cell foo @1,3 = 2; +cell a @(1, 3) = 1; +cell foo @(1, 3) = 2; cell b = 3; "#, ); - let code = compile_program::(program, None); - assert!(code.is_err()); - assert!(code - .unwrap_err() - .to_string() - .contains("Location specifier @1,3 conflicts with another allocation")); + assert_eq!( + compile_program::(program, None).unwrap_err(), + "Location specifier @(1, 3) conflicts with another allocation" + ); } #[test] @@ -2918,32 +2918,28 @@ cell b = 3; let program = String::from( r#" cell a @2 = 1; -cell foo @2,0 = 2; +cell foo @(2, 0) = 2; cell b = 3; "#, ); - let code = compile_program::(program, None); - assert!(code.is_err()); - assert!(code - .unwrap_err() - .to_string() - .contains("Location specifier @2,0 conflicts with another allocation")); + assert_eq!( + compile_program::(program, None).unwrap_err(), + "Location specifier @(2, 0) conflicts with another allocation" + ); } #[test] fn memory_specifiers_2d_5() { let program = String::from( r#" -cell a @2,4 = 1; -cell[4] b @0,4; +cell a @(2, 4) = 1; +cell[4] b @(0, 4); "#, ); - let code = compile_program::(program, None); - assert!(code.is_err()); - assert!(code - .unwrap_err() - .to_string() - .contains("Location specifier @0,4 conflicts with another allocation")); + assert_eq!( + compile_program::(program, None).unwrap_err(), + "Location specifier @(0, 4) conflicts with another allocation" + ); } #[test] @@ -3008,16 +3004,14 @@ output i; fn tiles_memory_allocation_3() { let program = String::from( r#" -cell a @2,4 = 1; -cell[4] b @0,4; +cell a @(2, 4) = 1; +cell[4] b @(0, 4); "#, ); - let code = compile_program::(program, Some(OPT_NONE_TILES)); - assert!(code.is_err()); - assert!(code - .unwrap_err() - .to_string() - .contains("Location specifier @0,4 conflicts with another allocation")); + assert_eq!( + compile_program::(program, Some(OPT_NONE_TILES)).unwrap_err(), + "Location specifier @(0, 4) conflicts with another allocation" + ); } #[test] @@ -3108,16 +3102,14 @@ output i; fn zig_zag_memory_allocation_3() { let program = String::from( r#" -cell a @2,4 = 1; -cell[4] b @0,4; +cell a @(2, 4) = 1; +cell[4] b @(0, 4); "#, ); - let code = compile_program::(program, Some(OPT_NONE_ZIG_ZAG)); - assert!(code.is_err()); - assert!(code - .unwrap_err() - .to_string() - .contains("Location specifier @(0, 4) conflicts with another allocation")); + assert_eq!( + compile_program::(program, Some(OPT_NONE_ZIG_ZAG)).unwrap_err(), + "Location specifier @(0, 4) conflicts with another allocation" + ); } #[test] @@ -3210,16 +3202,14 @@ output i; fn spiral_memory_allocation_3() { let program = String::from( r#" -cell a @2,4 = 1; -cell[4] b @0,4; +cell a @(2, 4) = 1; +cell[4] b @(0, 4); "#, ); - let code = compile_program::(program, Some(OPT_NONE_SPIRAL)); - assert!(code.is_err()); - assert!(code - .unwrap_err() - .to_string() - .contains("Location specifier @(0,4) conflicts with another allocation")); + assert_eq!( + compile_program::(program, Some(OPT_NONE_SPIRAL)).unwrap_err(), + "Location specifier @(0,4) conflicts with another allocation" + ); } #[test] diff --git a/compiler/src/tokeniser.rs b/compiler/src/tokeniser.rs index f6cf204..a767b41 100644 --- a/compiler/src/tokeniser.rs +++ b/compiler/src/tokeniser.rs @@ -1,8 +1,6 @@ // project dependencies: use crate::macros::macros::{r_assert, r_panic}; -// stdlib dependencies: - // external dependencies: use regex_lite::Regex; From a0ddd99866e3a196217dadfbb54d0e33b152bf3e Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Fri, 17 Oct 2025 11:29:51 +1100 Subject: [PATCH 13/56] Tweak extended opcode enum --- compiler/src/backend.rs | 28 +++++----- compiler/src/frontend.rs | 27 ++++------ compiler/src/parser.rs | 108 ++++++++++++++++++++------------------ compiler/src/tokeniser.rs | 2 + 4 files changed, 83 insertions(+), 82 deletions(-) diff --git a/compiler/src/backend.rs b/compiler/src/backend.rs index ac5988d..2efb4b6 100644 --- a/compiler/src/backend.rs +++ b/compiler/src/backend.rs @@ -577,22 +577,22 @@ impl CellAllocator { } } -// #[derive(Clone, Copy, Debug)] -// pub enum Opcode { -// Add, -// Subtract, -// Right, -// Left, -// OpenLoop, -// CloseLoop, -// Output, -// Input, -// Clear, -// Up, -// Down, -// } +#[derive(Clone, Copy, Debug)] +#[cfg_attr(test, derive(PartialEq))] +pub enum Opcode { + Add, + Subtract, + Right, + Left, + OpenLoop, + CloseLoop, + Output, + Input, + Clear, +} #[derive(Clone, Copy, Debug)] +#[cfg_attr(test, derive(PartialEq))] pub enum Opcode2D { Add, Subtract, diff --git a/compiler/src/frontend.rs b/compiler/src/frontend.rs index ba008df..785cb6d 100644 --- a/compiler/src/frontend.rs +++ b/compiler/src/frontend.rs @@ -23,7 +23,7 @@ impl Into for TapeCell { impl MastermindContext { pub fn create_ir_scope<'a, TC: 'static + TapeCellVariant + Into>( &self, - clauses: &[Clause], + clauses: &[Clause], outer_scope: Option<&'a ScopeBuilder>, ) -> Result, String> { let mut scope = if let Some(outer) = outer_scope { @@ -34,7 +34,7 @@ impl MastermindContext { // TODO: fix unnecessary clones, and reimplement this with iterators somehow // hoist structs, then functions to top - let mut filtered_clauses_1: Vec> = vec![]; + let mut filtered_clauses_1 = vec![]; // first stage: structs (these need to be defined before functions, so they can be used as arguments) for clause in clauses { match clause { @@ -46,7 +46,7 @@ impl MastermindContext { } } // second stage: functions - let mut filtered_clauses_2: Vec> = vec![]; + let mut filtered_clauses_2 = vec![]; for clause in filtered_clauses_1 { match clause { Clause::DefineFunction { @@ -509,16 +509,7 @@ impl MastermindContext { self.ir_to_bf(instructions, Some(TC::origin_cell()))?; expanded_bf.extend(bf_code); } - ExtendedOpcode::Add => expanded_bf.push(Opcode2D::Add), - ExtendedOpcode::Subtract => expanded_bf.push(Opcode2D::Subtract), - ExtendedOpcode::Right => expanded_bf.push(Opcode2D::Right), - ExtendedOpcode::Left => expanded_bf.push(Opcode2D::Left), - ExtendedOpcode::OpenLoop => expanded_bf.push(Opcode2D::OpenLoop), - ExtendedOpcode::CloseLoop => expanded_bf.push(Opcode2D::CloseLoop), - ExtendedOpcode::Output => expanded_bf.push(Opcode2D::Output), - ExtendedOpcode::Input => expanded_bf.push(Opcode2D::Input), - ExtendedOpcode::Up => expanded_bf.push(Opcode2D::Up), - ExtendedOpcode::Down => expanded_bf.push(Opcode2D::Down), + ExtendedOpcode::Opcode(opcode) => expanded_bf.push(opcode), } } @@ -708,7 +699,11 @@ pub struct ScopeBuilder<'a, TapeCell> { variable_memory: HashMap, /// Functions accessible by any code within or in the current scope - functions: Vec<(String, Vec<(String, ValueType)>, Vec>)>, + functions: Vec<( + String, + Vec<(String, ValueType)>, + Vec>, + )>, /// Struct types definitions structs: HashMap, @@ -719,7 +714,7 @@ pub struct ScopeBuilder<'a, TapeCell> { #[derive(Clone, Debug)] // probably shouldn't be cloning here but whatever struct Function { arguments: Vec<(String, ValueType)>, - block: Vec>, + block: Vec>, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -1075,7 +1070,7 @@ where &mut self, new_function_name: &str, new_arguments: Vec>, - new_block: Vec>, + new_block: Vec>, ) -> Result<(), String> { let absolute_arguments = new_arguments .into_iter() diff --git a/compiler/src/parser.rs b/compiler/src/parser.rs index 8ef052b..695125f 100644 --- a/compiler/src/parser.rs +++ b/compiler/src/parser.rs @@ -1,5 +1,6 @@ // project dependencies: use crate::{ + backend::Opcode2D, cells::{TapeCell, TapeCell2D}, macros::macros::{r_assert, r_panic}, tokeniser::Token, @@ -9,7 +10,7 @@ use crate::{ use std::{fmt::Display, mem::discriminant, num::Wrapping}; /// recursive function to create a tree representation of the program -pub fn parse(tokens: &[Token]) -> Result>, String> { +pub fn parse(tokens: &[Token]) -> Result>, String> { // basic steps: // chew off tokens from the front, recursively parse blocks of tokens let mut clauses = Vec::new(); @@ -113,7 +114,9 @@ pub fn parse(tokens: &[Token]) -> Result>, Ok(clauses) } -fn parse_let_clause(clause: &[Token]) -> Result, String> { +fn parse_let_clause( + clause: &[Token], +) -> Result, String> { // cell x = 0; // struct DummyStruct y let mut i = 0usize; @@ -137,7 +140,9 @@ fn parse_let_clause(clause: &[Token]) -> Result } /// Parse tokens representing a struct definition into a clause -fn parse_struct_clause(clause: &[Token]) -> Result, String> { +fn parse_struct_clause( + clause: &[Token], +) -> Result, String> { let mut i = 0usize; let Token::Struct = &clause[i] else { r_panic!("Expected struct keyword in struct clause. This should never occur. {clause:#?}"); @@ -185,8 +190,8 @@ fn parse_struct_clause(clause: &[Token]) -> Result(clause: &[Token]) -> Result>, String> { - let mut clauses: Vec> = Vec::new(); +fn parse_add_clause(clause: &[Token]) -> Result>, String> { + let mut clauses = Vec::new(); let mut i = 0usize; let (var, len) = parse_var_target(&clause[i..])?; i += len; @@ -220,7 +225,7 @@ fn parse_add_clause(clause: &[Token]) -> Result>, String> { } // currently just syntax sugar, should make it actually do post/pre increments -fn parse_increment_clause(clause: &[Token]) -> Result, String> { +fn parse_increment_clause(clause: &[Token]) -> Result, String> { let (var, _) = parse_var_target(&clause[2..])?; //An increment clause can never be self referencing since it just VAR++ Ok(match (&clause[0], &clause[1]) { @@ -241,9 +246,9 @@ fn parse_increment_clause(clause: &[Token]) -> Result, String> { // assumed that the final token is a semicolon } -fn parse_set_clause(clause: &[Token]) -> Result>, String> { +fn parse_set_clause(clause: &[Token]) -> Result>, String> { // TODO: what do we do about arrays and strings and structs? - let mut clauses: Vec> = Vec::new(); + let mut clauses = Vec::new(); let mut i = 0usize; let (var, len) = parse_var_target(&clause[i..])?; i += len; @@ -293,7 +298,7 @@ fn parse_set_clause(clause: &[Token]) -> Result>, String> { fn parse_drain_copy_clause( clause: &[Token], is_draining: bool, -) -> Result, String> { +) -> Result, String> { // drain g {i += 1;}; // drain g into j; // copy foo into bar {g += 2; etc;}; @@ -356,7 +361,9 @@ fn parse_drain_copy_clause( }) } -fn parse_while_clause(clause: &[Token]) -> Result, String> { +fn parse_while_clause( + clause: &[Token], +) -> Result, String> { // TODO: make this able to accept expressions let mut i = 1usize; let (var, len) = parse_var_target(&clause[i..])?; @@ -372,7 +379,9 @@ fn parse_while_clause(clause: &[Token]) -> Result(clause: &[Token]) -> Result, String> { +fn parse_if_else_clause( + clause: &[Token], +) -> Result, String> { // skip first token, assumed to start with if let mut i = 1usize; let mut not = false; @@ -426,7 +435,7 @@ fn parse_if_else_clause(clause: &[Token]) -> Result(clause: &[Token]) -> Result, String> { +fn parse_output_clause(clause: &[Token]) -> Result, String> { let mut i = 1usize; let expr_tokens = &clause[i..(clause.len() - 1)]; @@ -440,7 +449,7 @@ fn parse_output_clause(clause: &[Token]) -> Result, String> { Ok(Clause::OutputValue { value: expr }) } -fn parse_input_clause(clause: &[Token]) -> Result, String> { +fn parse_input_clause(clause: &[Token]) -> Result, String> { let mut i = 1usize; let (var, len) = parse_var_target(&clause[i..])?; @@ -453,7 +462,7 @@ fn parse_input_clause(clause: &[Token]) -> Result, String> { Ok(Clause::InputVariable { var }) } -fn parse_assert_clause(clause: &[Token]) -> Result, String> { +fn parse_assert_clause(clause: &[Token]) -> Result, String> { let mut i = 1usize; let (var, len) = parse_var_target(&clause[i..])?; @@ -640,7 +649,9 @@ impl TapeCellLocation for TapeCell2D { } } -fn parse_brainfuck_clause(clause: &[Token]) -> Result, String> { +fn parse_brainfuck_clause( + clause: &[Token], +) -> Result, String> { // bf {++--<><} // bf @3 {++--<><} // bf clobbers var1 var2 {++--<><} @@ -678,19 +689,20 @@ fn parse_brainfuck_clause(clause: &[Token]) -> Result ops.push(ExtendedOpcode::Add), - Token::Minus => ops.push(ExtendedOpcode::Subtract), - Token::MoreThan => ops.push(ExtendedOpcode::Right), - Token::LessThan => ops.push(ExtendedOpcode::Left), - Token::Caret => ops.push(ExtendedOpcode::Up), - Token::OpenSquareBracket => ops.push(ExtendedOpcode::OpenLoop), - Token::ClosingSquareBracket => ops.push(ExtendedOpcode::CloseLoop), - Token::Dot => ops.push(ExtendedOpcode::Output), - Token::Comma => ops.push(ExtendedOpcode::Input), + Token::Plus => ops.push(ExtendedOpcode::Opcode(Opcode2D::Add)), + Token::Minus => ops.push(ExtendedOpcode::Opcode(Opcode2D::Subtract)), + Token::MoreThan => ops.push(ExtendedOpcode::Opcode(Opcode2D::Right)), + Token::LessThan => ops.push(ExtendedOpcode::Opcode(Opcode2D::Left)), + Token::OpenSquareBracket => ops.push(ExtendedOpcode::Opcode(Opcode2D::OpenLoop)), + Token::ClosingSquareBracket => ops.push(ExtendedOpcode::Opcode(Opcode2D::CloseLoop)), + Token::Dot => ops.push(ExtendedOpcode::Opcode(Opcode2D::Output)), + Token::Comma => ops.push(ExtendedOpcode::Opcode(Opcode2D::Input)), + // TODO: refactor this: + Token::Caret => ops.push(ExtendedOpcode::Opcode(Opcode2D::Up)), Token::Name(s) => { for c in s.chars() { if c == 'v' { - ops.push(ExtendedOpcode::Down); + ops.push(ExtendedOpcode::Opcode(Opcode2D::Down)); } else { panic!("Invalid Inline Brainfuck Characters in {s}"); } @@ -718,7 +730,7 @@ fn parse_brainfuck_clause(clause: &[Token]) -> Result( clause: &[Token], -) -> Result, String> { +) -> Result, String> { let mut i = 1usize; // function name let Token::Name(name) = &clause[i] else { @@ -768,7 +780,7 @@ fn parse_function_definition_clause( }) } -fn parse_function_call_clause(clause: &[Token]) -> Result, String> { +fn parse_function_call_clause(clause: &[Token]) -> Result, String> { let mut i = 0usize; // Okay I didn't know this rust syntax, could have used it all over the place let Token::Name(name) = &clause[i] else { @@ -1299,11 +1311,12 @@ impl Sign { } } -/// Clause type with TC (tape cell) as a type variable -/// TC can be changed to implement 2D brainfuck, or other modifications +/// Clause type type variables: +/// - TC: TapeCell can be changed to implement 2D brainfuck, or other modifications +/// - OC: Opcode represents the valid Brainfuck Opcodes that we're generating (also used for 2D or other BF variants) #[derive(Debug, Clone)] #[cfg_attr(test, derive(PartialEq))] -pub enum Clause { +pub enum Clause { DeclareVariable { var: VariableDefinition, }, @@ -1334,12 +1347,12 @@ pub enum Clause { CopyLoop { source: Expression, targets: Vec, - block: Vec>, + block: Vec>, is_draining: bool, }, WhileLoop { var: VariableTarget, - block: Vec>, + block: Vec>, }, OutputValue { value: Expression, @@ -1351,7 +1364,7 @@ pub enum Clause { name: String, // TODO: fix the type here, as function definitions don't actually need location specifiers and therefore don't need a tape cell type arguments: Vec>, - block: Vec>, + block: Vec>, }, CallFunction { function_name: String, @@ -1359,32 +1372,23 @@ pub enum Clause { }, IfElse { condition: Expression, - if_block: Option>>, - else_block: Option>>, + if_block: Option>>, + else_block: Option>>, }, - Block(Vec>), + Block(Vec>), InlineBrainfuck { location_specifier: LocationSpecifier, clobbered_variables: Vec, - operations: Vec>, + operations: Vec>, }, } // extended brainfuck opcodes to include mastermind code blocks #[derive(Debug, Clone)] #[cfg_attr(test, derive(PartialEq))] -pub enum ExtendedOpcode { - Add, - Subtract, - Right, - Left, - OpenLoop, - CloseLoop, - Output, - Input, - Block(Vec>), - Up, - Down, +pub enum ExtendedOpcode { + Opcode(OC), + Block(Vec>), } #[derive(Debug, Clone, Hash, PartialEq, Eq)] @@ -1563,13 +1567,13 @@ impl Display for VariableTarget { #[cfg(test)] mod parser_tests { - use crate::cells::TapeCell; + use crate::{backend::Opcode2D, cells::TapeCell}; use super::*; #[test] fn parse_if_1() { - assert!(parse(&[ + assert!(parse::(&[ // if true {{}} Token::If, Token::True, @@ -1582,7 +1586,7 @@ mod parser_tests { .iter() .eq(&[Clause::IfElse { condition: Expression::NaturalNumber(1), - if_block: Some(vec![Clause::::Block(vec![])]), + if_block: Some(vec![Clause::::Block(vec![])]), else_block: None, }])); } diff --git a/compiler/src/tokeniser.rs b/compiler/src/tokeniser.rs index a767b41..3eed4fc 100644 --- a/compiler/src/tokeniser.rs +++ b/compiler/src/tokeniser.rs @@ -4,6 +4,8 @@ use crate::macros::macros::{r_assert, r_panic}; // external dependencies: use regex_lite::Regex; +// TODO: refactor: combine tokeniser and parser into one +// make the inline brainfuck tokens contextual pub fn tokenise(source: &String) -> Result, String> { let stripped = source .lines() From 7665aed37242d108ec55788a06645ae84a3aff83 Mon Sep 17 00:00:00 2001 From: Missing Date: Sat, 18 Oct 2025 12:07:04 -0500 Subject: [PATCH 14/56] added thing --- programs/stack | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 programs/stack diff --git a/programs/stack b/programs/stack new file mode 100644 index 0000000..376455a --- /dev/null +++ b/programs/stack @@ -0,0 +1,59 @@ +// NOTE: STACK OVERFLOWS/UNDERFLOWS WILL FUCK UP YOUR PROGRAM +// Also, this has inline brainfuck in it, so it probably won't be readable. + +// This hasn't been tested very much because debugging brainfuck +// is hell. + +struct stack16 { + cell start @0; + cell[32] arr @1; // each entry uses 2 cells. yes this is stupid. + cell one @33; + cell[2] end @34; +} + +fn init_stack(struct stack16 stack) { + stack.one = 1; +} + +fn get_to_current(struct stack16 stack) { + bf @stack.one { + [<<]>>> + } +} + +fn get_back_to_one() { + bf { + <[>>]<< + } +} + +fn push(struct stack16 stack, cell x) { + stack.one = 1; + cell in = x; + bf @stack.one { + [<<]+[>>]<< + } + while in { + in -= 1; + get_to_current(stack); + bf {+} + get_back_to_one(); + } +} + +fn pop(struct stack16 stack, cell return) { + stack.one = 1; + bf @return {[-]} // The compiler doesn't want to clear return + get_to_current(stack); + bf {[-} + get_back_to_one(); + return += 1; + get_to_current(stack); + bf {]<->>>} + get_back_to_one(); +} + + + + + From 64fd40509c536da170a0c1a3fa335d251ae2f130 Mon Sep 17 00:00:00 2001 From: Missing Date: Sat, 18 Oct 2025 12:27:21 -0500 Subject: [PATCH 15/56] update also oops wrong folder --- programs/std/stack | 59 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 programs/std/stack diff --git a/programs/std/stack b/programs/std/stack new file mode 100644 index 0000000..376455a --- /dev/null +++ b/programs/std/stack @@ -0,0 +1,59 @@ +// NOTE: STACK OVERFLOWS/UNDERFLOWS WILL FUCK UP YOUR PROGRAM +// Also, this has inline brainfuck in it, so it probably won't be readable. + +// This hasn't been tested very much because debugging brainfuck +// is hell. + +struct stack16 { + cell start @0; + cell[32] arr @1; // each entry uses 2 cells. yes this is stupid. + cell one @33; + cell[2] end @34; +} + +fn init_stack(struct stack16 stack) { + stack.one = 1; +} + +fn get_to_current(struct stack16 stack) { + bf @stack.one { + [<<]>>> + } +} + +fn get_back_to_one() { + bf { + <[>>]<< + } +} + +fn push(struct stack16 stack, cell x) { + stack.one = 1; + cell in = x; + bf @stack.one { + [<<]+[>>]<< + } + while in { + in -= 1; + get_to_current(stack); + bf {+} + get_back_to_one(); + } +} + +fn pop(struct stack16 stack, cell return) { + stack.one = 1; + bf @return {[-]} // The compiler doesn't want to clear return + get_to_current(stack); + bf {[-} + get_back_to_one(); + return += 1; + get_to_current(stack); + bf {]<->>>} + get_back_to_one(); +} + + + + + From fb1f4eba6c4aa0702133a90666a130cc3f4fbc8e Mon Sep 17 00:00:00 2001 From: Missing Date: Sat, 18 Oct 2025 12:28:50 -0500 Subject: [PATCH 16/56] Delete programs/stack --- programs/stack | 59 -------------------------------------------------- 1 file changed, 59 deletions(-) delete mode 100644 programs/stack diff --git a/programs/stack b/programs/stack deleted file mode 100644 index 376455a..0000000 --- a/programs/stack +++ /dev/null @@ -1,59 +0,0 @@ -// NOTE: STACK OVERFLOWS/UNDERFLOWS WILL FUCK UP YOUR PROGRAM -// Also, this has inline brainfuck in it, so it probably won't be readable. - -// This hasn't been tested very much because debugging brainfuck -// is hell. - -struct stack16 { - cell start @0; - cell[32] arr @1; // each entry uses 2 cells. yes this is stupid. - cell one @33; - cell[2] end @34; -} - -fn init_stack(struct stack16 stack) { - stack.one = 1; -} - -fn get_to_current(struct stack16 stack) { - bf @stack.one { - [<<]>>> - } -} - -fn get_back_to_one() { - bf { - <[>>]<< - } -} - -fn push(struct stack16 stack, cell x) { - stack.one = 1; - cell in = x; - bf @stack.one { - [<<]+[>>]<< - } - while in { - in -= 1; - get_to_current(stack); - bf {+} - get_back_to_one(); - } -} - -fn pop(struct stack16 stack, cell return) { - stack.one = 1; - bf @return {[-]} // The compiler doesn't want to clear return - get_to_current(stack); - bf {[-} - get_back_to_one(); - return += 1; - get_to_current(stack); - bf {]<->>>} - get_back_to_one(); -} - - - - - From 9b21954b85de14ce6c30eb4362f25ccfad880b71 Mon Sep 17 00:00:00 2001 From: Missing Date: Sun, 19 Oct 2025 22:22:25 -0500 Subject: [PATCH 17/56] updated the thing --- programs/std/stack | 110 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 87 insertions(+), 23 deletions(-) diff --git a/programs/std/stack b/programs/std/stack index 376455a..b9a7ef6 100644 --- a/programs/std/stack +++ b/programs/std/stack @@ -1,59 +1,123 @@ +// stack program. by MSMissing on github + // NOTE: STACK OVERFLOWS/UNDERFLOWS WILL FUCK UP YOUR PROGRAM // Also, this has inline brainfuck in it, so it probably won't be readable. -// This hasn't been tested very much because debugging brainfuck -// is hell. - -struct stack16 { +struct stack255 { cell start @0; - cell[32] arr @1; // each entry uses 2 cells. yes this is stupid. - cell one @33; - cell[2] end @34; + cell[510] arr @1; + cell one @511; + cell len @512; + cell end @513; } -fn init_stack(struct stack16 stack) { +// == DOCUMENTATION == + +// struct stack255 -- a stack object you can push to and pop from. +// can hold up to 255 values. + +// push(stack, x) -- pushes x to the stack + +// push_d(stack, x) -- pushes x to the stack, destroying it in the +// process. faster than push + +// push_safe(stack, x) -- pushes x to the stack, and outputs an error if +// if it can't. cannot end the program. + +// push_d_safe(stack, x) -- same as push_safe, but destructive + +// pop(stack, out) -- pulls the last value from the stack and puts it +// into out. + +// pop_safe(stack, out) -- same as pop, but outputs an error if the stack +// underflows. cannot end the program. + + + +// REMEMBER TO INITIALIZE YOUR STACKS OR THE CODE WILL LIKELY BE SLOWER +fn init_stack(struct stack255 stack) { stack.one = 1; } -fn get_to_current(struct stack16 stack) { - bf @stack.one { +fn move_to_current(cell one) { // move the pointer to the last value pushe + bf @one { // input is stack.one [<<]>>> } } - -fn get_back_to_one() { +fn move_to_one() { // move pointer to stack.one after calling move_to_current() bf { <[>>]<< } } -fn push(struct stack16 stack, cell x) { +fn push_d(struct stack255 stack, cell in) { // destructive version of push stack.one = 1; - cell in = x; + stack.len += 1; + + // set usage flag bf @stack.one { [<<]+[>>]<< } + + // drain input to the array item while in { in -= 1; - get_to_current(stack); + move_to_current(stack.one); bf {+} - get_back_to_one(); + move_to_one(); } } -fn pop(struct stack16 stack, cell return) { +fn push(struct stack255 stack, cell x) { + cell in = x; // copy the input in case they want to keep it + push_d(stack, in); +} + +fn push_safe(struct stack255 stack, cell x) { + if stack.len - 255 { + push(stack, x); + } else { + output "ERROR: STACK OVERFLOW"; + } +} + +fn push_d_safe(struct stack255 stack, cell in) { + if stack.len - 255 { + push_d(stack, in); + } else { + in = 0; + output "ERROR: STACK OVERFLOW"; + } +} + +fn pop(struct stack255 stack, cell return) { + assert return unknown; // sometimes the compiler fucks up + return = 0; stack.one = 1; - bf @return {[-]} // The compiler doesn't want to clear return - get_to_current(stack); - bf {[-} - get_back_to_one(); + stack.len -= 1; + + move_to_current(stack.one); + bf @stack.one {[-} + move_to_one(); return += 1; - get_to_current(stack); + move_to_current(stack.one); bf {]<->>>} - get_back_to_one(); + move_to_one(); +} + +fn pop_safe(struct stack255 stack, cell return) { + if stack.len { + pop(stack, return); + } else { + return = 0; + output "ERROR: STACK UNDERFLOW"; + } } + + + From 1eefbf32a1dfca5fa4501b828d8cacb446f78479 Mon Sep 17 00:00:00 2001 From: Missing Date: Sun, 19 Oct 2025 23:30:53 -0500 Subject: [PATCH 18/56] Added stack32 data type stack32 is just a smaller stack255 --- programs/std/stack | 101 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 87 insertions(+), 14 deletions(-) diff --git a/programs/std/stack b/programs/std/stack index b9a7ef6..3981f18 100644 --- a/programs/std/stack +++ b/programs/std/stack @@ -4,11 +4,17 @@ // Also, this has inline brainfuck in it, so it probably won't be readable. struct stack255 { - cell start @0; - cell[510] arr @1; - cell one @511; - cell len @512; - cell end @513; + cell[510] arr; + cell one; + cell len; + cell end; +} + +struct stack32 { + cell[64] arr; + cell one; + cell len; + cell end; } // == DOCUMENTATION == @@ -16,6 +22,8 @@ struct stack255 { // struct stack255 -- a stack object you can push to and pop from. // can hold up to 255 values. +// struct stack32 -- smaller stack. holds 32 values. + // push(stack, x) -- pushes x to the stack // push_d(stack, x) -- pushes x to the stack, destroying it in the @@ -39,8 +47,12 @@ fn init_stack(struct stack255 stack) { stack.one = 1; } +fn init_stack(struct stack32 stack) { + stack.one = 1; +} + fn move_to_current(cell one) { // move the pointer to the last value pushe - bf @one { // input is stack.one + bf @one { // input is stack.one [<<]>>> } } @@ -55,7 +67,25 @@ fn push_d(struct stack255 stack, cell in) { // destructive version of push stack.len += 1; // set usage flag - bf @stack.one { + bf @stack.one clobbers *stack.arr { + [<<]+[>>]<< + } + + // drain input to the array item + while in { + in -= 1; + move_to_current(stack.one); + bf clobbers *stack.arr {+} + move_to_one(); + } +} + +fn push_d(struct stack32 stack, cell in) { + stack.one = 1; + stack.len += 1; + + // set usage flag + bf @stack.one clobbers *stack.arr { [<<]+[>>]<< } @@ -63,21 +93,34 @@ fn push_d(struct stack255 stack, cell in) { // destructive version of push while in { in -= 1; move_to_current(stack.one); - bf {+} + bf clobbers *stack.arr {+} move_to_one(); } } fn push(struct stack255 stack, cell x) { - cell in = x; // copy the input in case they want to keep it + cell in = x; // copy the input push_d(stack, in); } +fn push(struct stack32 stack, cell x) { + cell in = x; + push_d(stack, x); +} + fn push_safe(struct stack255 stack, cell x) { if stack.len - 255 { push(stack, x); } else { - output "ERROR: STACK OVERFLOW"; + output "STACK OVERFLOW"; + } +} + +fn push_safe(struct stack32 stack, cell x) { + if stack.len - 32 { + push(stack, x); + } else { + output "STACK OVERFLOW"; } } @@ -86,18 +129,26 @@ fn push_d_safe(struct stack255 stack, cell in) { push_d(stack, in); } else { in = 0; - output "ERROR: STACK OVERFLOW"; + output "STACK OVERFLOW"; + } +} + +fn push_d_safe(struct stack32 stack, cell in) { + if stack.len - 32 { + push_d(stack, in); + } else { + in = 0; + output "STACK OVERFLOW"; } } fn pop(struct stack255 stack, cell return) { - assert return unknown; // sometimes the compiler fucks up return = 0; stack.one = 1; stack.len -= 1; move_to_current(stack.one); - bf @stack.one {[-} + bf clobbers *stack.arr return {[-} move_to_one(); return += 1; move_to_current(stack.one); @@ -105,15 +156,37 @@ fn pop(struct stack255 stack, cell return) { move_to_one(); } +fn pop(struct stack32 stack, cell return) { + return = 0; + stack.one = 1; + stack.len -= 1; + + move_to_current(stack.one); + bf clobbers *stack.arr return {[-} + move_to_one(); + return += 1; + move_to_current(stack.one); + bf clobbers *stack.arr {]<->>>} + move_to_one(); +} + fn pop_safe(struct stack255 stack, cell return) { if stack.len { pop(stack, return); } else { return = 0; - output "ERROR: STACK UNDERFLOW"; + output "STACK UNDERFLOW"; } } +fn pop_safe(struct stack32 stack, cell return) { + if stack.len { + pop(stack, return); + } else { + return = 0; + output "STACK UNDERFLOW"; + } +} From fec786ac19377994c5e36aa6812f87adf5b7a9f2 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Wed, 22 Oct 2025 17:31:12 +1100 Subject: [PATCH 19/56] WIP: make backend generic and implement variants need to fix lifetimes --- compiler/src/backend.rs | 748 ---------------------------- compiler/src/backend/bf.rs | 251 ++++++++++ compiler/src/backend/bf2d.rs | 401 +++++++++++++++ compiler/src/backend/common.rs | 469 +++++++++++++++++ compiler/src/backend/mod.rs | 4 + compiler/src/brainfuck.rs | 2 +- compiler/src/brainfuck_optimiser.rs | 75 +-- compiler/src/cells.rs | 49 -- compiler/src/constants_optimiser.rs | 14 +- compiler/src/frontend.rs | 64 ++- compiler/src/lib.rs | 31 +- compiler/src/main.rs | 7 +- compiler/src/parser.rs | 257 +++++++--- compiler/src/tests.rs | 294 ++++++----- 14 files changed, 1554 insertions(+), 1112 deletions(-) delete mode 100644 compiler/src/backend.rs create mode 100644 compiler/src/backend/bf.rs create mode 100644 compiler/src/backend/bf2d.rs create mode 100644 compiler/src/backend/common.rs create mode 100644 compiler/src/backend/mod.rs delete mode 100644 compiler/src/cells.rs diff --git a/compiler/src/backend.rs b/compiler/src/backend.rs deleted file mode 100644 index 2efb4b6..0000000 --- a/compiler/src/backend.rs +++ /dev/null @@ -1,748 +0,0 @@ -// turns low-level bf instructions into plain bf -// take in a timeline of cell allocations and move-to-cell operations, etc -// output plain bf according to that spec - -// this algorithm is responsible for actually allocating physical tape cells as opposed to the parser -// can introduce optimisations here with some kind of allocation timeline sorting algorithm (hard leetcode style problem) - -use std::{ - collections::{HashMap, HashSet}, - num::Wrapping, -}; - -use crate::{ - cells::TapeCell2D, - constants_optimiser::calculate_optimal_addition, - frontend::{CellLocation, Instruction, MemoryId}, - macros::macros::{r_assert, r_panic}, - misc::MastermindContext, -}; - -type LoopDepth = usize; -type TapeValue = u8; - -impl MastermindContext { - pub fn ir_to_bf + PartialEq>( - &self, - instructions: Vec>, - return_to_cell: Option, - ) -> Result, String> { - let mut allocator = CellAllocator::new(); - let mut alloc_map: HashMap< - MemoryId, - (TapeCell2D, usize, LoopDepth, Vec>), - > = HashMap::new(); - - let mut loop_stack: Vec = Vec::new(); - let mut current_loop_depth: LoopDepth = 0; - let mut skipped_loop_depth: Option = None; - let mut ops = BFBuilder2D::new(); - - for instruction in instructions { - if let Some(depth) = skipped_loop_depth { - // current loop is being skipped because of unreachable loop optimisations - match instruction { - Instruction::OpenLoop(_) => { - current_loop_depth += 1; - } - Instruction::CloseLoop(_) => { - current_loop_depth -= 1; - if current_loop_depth == depth { - skipped_loop_depth = None; - } - } - _ => (), - } - continue; - } - match instruction { - // the ids (indices really) given by the compiler are guaranteed to be unique (at the time of writing) - // however they will absolutely not be very efficient if used directly as cell locations - Instruction::Allocate(memory, location_specifier) => { - let cell = allocator.allocate( - location_specifier.map(|c| c.into()), - memory.len(), - self.config.memory_allocation_method, - )?; - let None = alloc_map.insert( - memory.id(), - ( - cell, - memory.len(), - current_loop_depth, - vec![Some(0); memory.len()], - ), - ) else { - r_panic!("Attempted to reallocate memory {memory:#?}"); - }; - } - Instruction::AssertCellValue(cell_obj, imm) => { - let Some((_cell_base, size, alloc_loop_depth, known_values)) = - alloc_map.get_mut(&cell_obj.memory_id) - else { - r_panic!( - "Attempted to assert value of cell {cell_obj:#?} \ -which could not be found" - ); - }; - - let mem_idx = cell_obj.index.unwrap_or(0); - r_assert!( - mem_idx < *size, - "Attempted to access memory outside of allocation" - ); - let known_value = &mut known_values[mem_idx]; - - // allow the user to assert that we don't know the value of the cell by clobbering when we do inline brainfuck - if *alloc_loop_depth == current_loop_depth || imm.is_none() { - *known_value = imm; - } else { - r_panic!( - "Cannot assert cell {cell_obj:#?} value \ -outside of loop it was allocated" - ); - } - } - Instruction::Free(id) => { - // TODO: do I need to check alloc loop depth here? Or are cells never freed in an inner scope? - // think about this in regards to reusing cell space when a cell isn't being used - let Some((cell, size, _alloc_loop_depth, known_values)) = alloc_map.remove(&id) - else { - r_panic!("Attempted to free memory id {id} which could not be found"); - }; - - let None = known_values - .into_iter() - .find_map(|known_value| (known_value.unwrap_or(1) != 0).then_some(())) - else { - r_panic!( - "Attempted to free memory id {id} which has unknown or non-zero values" - ); - }; - - allocator.free(cell.into(), size)?; - } - Instruction::OpenLoop(cell_obj) => { - let Some((cell_base, size, alloc_loop_depth, known_values)) = - alloc_map.get_mut(&cell_obj.memory_id) - else { - r_panic!( - "Attempted to open loop at cell {cell_obj:#?} which could not be found" - ); - }; - - let mem_idx = cell_obj.index.unwrap_or(0); - r_assert!( - mem_idx < *size, - "Attempted to access memory outside of allocation" - ); - let cell = TapeCell2D(cell_base.0 + mem_idx as i32, cell_base.1); - let known_value = &mut known_values[mem_idx]; - - let mut open = true; - - if let Some(known_value) = known_value { - if *alloc_loop_depth == current_loop_depth - && *known_value == 0 && self.config.optimise_unreachable_loops - { - open = false; - skipped_loop_depth = Some(current_loop_depth); - current_loop_depth += 1; - } - } - - // skip the loop if the optimisations are turned on and we know the value is 0 - if open { - ops.move_to_cell(cell); - ops.push(Opcode2D::OpenLoop); - loop_stack.push(cell); - current_loop_depth += 1; - } - } - Instruction::CloseLoop(cell_obj) => { - let Some((cell_base, size, alloc_loop_depth, known_values)) = - alloc_map.get_mut(&cell_obj.memory_id) - else { - r_panic!( - "Attempted to close loop at cell {cell_obj:#?} which could not be found" - ); - }; - - let mem_idx = cell_obj.index.unwrap_or(0); - r_assert!( - mem_idx < *size, - "Attempted to access memory outside of allocation" - ); - let cell = TapeCell2D(cell_base.0 + mem_idx as i32, cell_base.1); - let known_value = &mut known_values[mem_idx]; - - let Some(stack_cell) = loop_stack.pop() else { - r_panic!("Attempted to close un-opened loop"); - }; - r_assert!(cell == stack_cell, "Attempted to close a loop unbalanced"); - - current_loop_depth -= 1; - - ops.move_to_cell(cell); - ops.push(Opcode2D::CloseLoop); - - // if a loop finishes on a cell then it is guaranteed to be 0 based on brainfuck itself - // I did encounter issues with nested loops here, interesting - if current_loop_depth == *alloc_loop_depth { - *known_value = Some(0); - } - } - Instruction::AddToCell(cell_obj, imm) => { - let Some((cell_base, size, alloc_loop_depth, known_values)) = - alloc_map.get_mut(&cell_obj.memory_id) - else { - r_panic!("Attempted to add to cell {cell_obj:#?} which could not be found"); - }; - - let mem_idx = cell_obj.index.unwrap_or(0); - r_assert!( - mem_idx < *size, - "Attempted to access memory outside of allocation" - ); - let cell = TapeCell2D(cell_base.0 + mem_idx as i32, cell_base.1); - let known_value = &mut known_values[mem_idx]; - - // TODO: fix bug, if only one multiplication then we can have a value already in the cell, but never otherwise - - // not sure if these optimisations should be in the builder step or in the compiler - if self.config.optimise_constants { - // ops.move_to_cell(&mut head_pos, cell); - // here we use an algorithm that finds the best combo of products and constants to make the number to minimise bf code - // first we get the closest allocated cell so we can calculate the distance cost of multiplying - // TODO: instead find the nearest zero cell, doesn't matter if allocated or not - let temp_cell = allocator.allocate_temp_cell(cell); - - let optimised_ops: BFBuilder2D = - calculate_optimal_addition(imm as i8, ops.head_pos, cell, temp_cell); - - ops.head_pos = optimised_ops.head_pos; - ops.extend(optimised_ops.opcodes); - - allocator.free(temp_cell, 1)?; - } else { - ops.move_to_cell(cell); - ops.add_to_current_cell(imm as i8); - } - - if imm != 0 { - if *alloc_loop_depth != current_loop_depth { - *known_value = None; - } else if let Some(known_value) = known_value { - *known_value = (Wrapping(*known_value) + Wrapping(imm)).0; - } - } - } - Instruction::InputToCell(cell_obj) => { - let Some((cell_base, size, _, known_values)) = - alloc_map.get_mut(&cell_obj.memory_id) - else { - r_panic!( - "Attempted to input to cell {cell_obj:#?} which could not be found" - ); - }; - - // TODO: refactor this duplicate code (get_cell_safe or something like that) - let mem_idx = cell_obj.index.unwrap_or(0); - r_assert!( - mem_idx < *size, - "Attempted to access memory outside of allocation" - ); - let cell = TapeCell2D(cell_base.0 + mem_idx as i32, cell_base.1); - let known_value = &mut known_values[mem_idx]; - - ops.move_to_cell(cell); - ops.push(Opcode2D::Input); - // no way to know at compile time what the input to the program will be - *known_value = None; - } - // Instruction::AssertCellValue(id, value) => {} - Instruction::ClearCell(cell_obj) => { - let Some((cell_base, size, alloc_loop_depth, known_values)) = - alloc_map.get_mut(&cell_obj.memory_id) - else { - r_panic!("Attempted to clear cell {cell_obj:#?} which could not be found"); - }; - - let mem_idx = cell_obj.index.unwrap_or(0); - r_assert!( - mem_idx < *size, - "Attempted to access memory outside of allocation" - ); - let cell = TapeCell2D(cell_base.0 + mem_idx as i32, cell_base.1); - let known_value = &mut known_values[mem_idx]; - - ops.move_to_cell(cell); - - let mut clear = true; - - if let Some(known_value) = known_value { - if self.config.optimise_cell_clearing - && *alloc_loop_depth == current_loop_depth - && (*known_value as i8).abs() < 4 - // not sure if this should be 4 or 3, essentially it depends on if we prefer clears or changes [-] vs ++--- - { - let imm = *known_value as i8; - if imm > 0 { - for _ in 0..imm { - ops.push(Opcode2D::Subtract); - } - } else if imm < 0 { - for _ in 0..-imm { - ops.push(Opcode2D::Add); - } - } - clear = false; - } - } - - if clear { - ops.push(Opcode2D::Clear); - } - - if *alloc_loop_depth == current_loop_depth { - *known_value = Some(0); - } else { - // TODO: fix this for if statements - *known_value = None; - } - } - Instruction::OutputCell(cell_obj) => { - let Some((cell_base, size, _, _)) = alloc_map.get(&cell_obj.memory_id) else { - r_panic!("Attempted to output cell {cell_obj:#?} which could not be found"); - }; - - let mem_idx = cell_obj.index.unwrap_or(0); - r_assert!( - mem_idx < *size, - "Attempted to access memory outside of allocation" - ); - let cell = TapeCell2D(cell_base.0 + mem_idx as i32, cell_base.1); - - ops.move_to_cell(cell); - ops.push(Opcode2D::Output); - } - Instruction::InsertBrainfuckAtCell(operations, location_specifier) => { - // move to the correct cell, based on the location specifier - match location_specifier { - CellLocation::FixedCell(cell) => ops.move_to_cell(cell.into()), - CellLocation::MemoryCell(cell_obj) => { - let Some((cell_base, size, _alloc_loop_depth, _known_values)) = - alloc_map.get(&cell_obj.memory_id) - else { - r_panic!("Attempted to use location of cell {cell_obj:#?} which could not be found"); - }; - let mem_idx = cell_obj.index.unwrap_or(0); - r_assert!( - mem_idx < *size, - "Attempted to access memory outside of allocation" - ); - let cell = TapeCell2D(cell_base.0 + mem_idx as i32, cell_base.1); - ops.move_to_cell(cell); - } - CellLocation::Unspecified => (), - } - - // paste the in-line BF operations - ops.extend(operations); - } - } - } - - // this is used in embedded brainfuck contexts to preserve head position - if let Some(origin_cell) = return_to_cell { - ops.move_to_cell(origin_cell.into()); - } - - Ok(ops.opcodes) - } -} - -struct CellAllocator { - alloc_map: HashSet, -} - -// allocator will not automatically allocate negative-index cells -// but users can -impl CellAllocator { - fn new() -> CellAllocator { - CellAllocator { - alloc_map: HashSet::new(), - } - } - - /// Checks if the memory size can be allocated to the right of a given location e.g. arrays - fn check_allocatable(&mut self, location: &TapeCell2D, size: usize) -> bool { - for k in 0..size { - if self - .alloc_map - .contains(&TapeCell2D(location.0 + k as i32, location.1)) - { - return false; - } - } - return true; - } - - /// Will either check a specific location can be allocated at the chosen size or if no location is - /// provided it will find a memory location where this size can be allocated - /// Uses a variety of memory allocation methods based on settings - fn allocate( - &mut self, - location: Option, - size: usize, - method: u8, - ) -> Result { - let mut region_start = location.unwrap_or(TapeCell2D(0, 0)); - //Check specified memory allocation above to ensure that this works nicely with all algorithms - if let Some(l) = location { - if !self.check_allocatable(&l, size) { - r_panic!("Location specifier @{l} conflicts with another allocation"); - } - } else { - // should the region start at the current tape head? - if method == 0 { - for i in region_start.0.. { - if self.alloc_map.contains(&TapeCell2D(i, region_start.1)) { - region_start = TapeCell2D(i + 1, region_start.1); - } else if i - region_start.0 == (size as i32 - 1) { - break; - } - } - } else if method == 1 { - //Zig Zag - let mut found = false; - let mut loops = 0; - let mut i; - let mut j; - while !found { - i = region_start.0 + loops; - j = region_start.1; - for _ in 0..=loops { - if self.check_allocatable(&TapeCell2D(i, j), size) { - found = true; - region_start = TapeCell2D(i, j); - break; - } - i = i - 1; - j = j + 1; - } - loops += 1; - } - } else if method == 2 { - //Spiral - let mut found = false; - let mut loops = 1; - let directions = ['N', 'E', 'S', 'W']; - let mut i = region_start.0; - let mut j = region_start.1; - while !found { - for dir in directions { - match dir { - 'N' => { - for _ in 0..loops { - j += 1; - if self.check_allocatable(&TapeCell2D(i, j), size) { - found = true; - region_start = TapeCell2D(i, j); - break; - } - } - } - 'E' => { - for _ in 0..loops { - i += 1; - if self.check_allocatable(&TapeCell2D(i, j), size) { - found = true; - region_start = TapeCell2D(i, j); - break; - } - } - } - 'S' => { - for _ in 0..loops { - j -= 1; - if self.check_allocatable(&TapeCell2D(i, j), size) { - found = true; - region_start = TapeCell2D(i, j); - break; - } - } - } - 'W' => { - for _ in 0..loops { - i -= 1; - if self.check_allocatable(&TapeCell2D(i, j), size) { - found = true; - region_start = TapeCell2D(i, j); - break; - } - } - } - _ => {} - } - if found { - break; - } - } - if found { - break; - } - i -= 1; - j -= 1; - loops += 2; - } - } else if method == 3 { - //Tiles - let mut found = false; - let mut loops = 0; - while !found { - for i in -loops..=loops { - for j in -loops..=loops { - if self.check_allocatable( - &TapeCell2D(region_start.0 + i, region_start.1 + j), - size, - ) { - found = true; - region_start = TapeCell2D(region_start.0 + i, region_start.1 + j); - break; - } - } - if found { - break; - } - } - loops += 1; - } - } else { - panic!("Memory Allocation Method not implemented"); - } - } - - // make all cells in the specified region allocated - for i in region_start.0..(region_start.0 + size as i32) { - if !self.alloc_map.contains(&TapeCell2D(i, region_start.1)) { - self.alloc_map.insert(TapeCell2D(i, region_start.1)); - } - } - - Ok(region_start) - } - - // allocate but start looking close to the given cell, used for optimising constants as you need an extra cell to multiply - // again not sure if this stuff should be in the builder step or the compiler step ? This seems the simplest for now - // but I'm wary that complex systems often evolve from simple ones, and any optimisations introduce complexity - fn allocate_temp_cell(&mut self, location: TapeCell2D) -> TapeCell2D { - // this will allocate the given cell if unallocated so beware - if self.alloc_map.insert(location) { - return location; - } - - // alternate left then right, getting further and further out - // there is surely a nice one liner rusty iterator way of doing it but somehow this is clearer until I learn that - let mut left_iter = (0..location.0).rev(); - let mut right_iter = (location.0 + 1)..; - loop { - if let Some(i) = left_iter.next() { - // unallocated cell, allocate it and return - if self.alloc_map.insert(TapeCell2D(i, location.1)) { - return TapeCell2D(i, location.1); - } - } - - if let Some(i) = right_iter.next() { - if self.alloc_map.insert(TapeCell2D(i, location.1)) { - return TapeCell2D(i, location.1); - } - } - } - } - - fn free(&mut self, cell: TapeCell2D, size: usize) -> Result<(), String> { - for i in cell.0..(cell.0 + size as i32) { - r_assert!( - self.alloc_map.contains(&TapeCell2D(i, cell.1)), - "Cannot free cell @{0},{1} as it is not allocated.", - i, - cell.1 - ); - self.alloc_map.remove(&TapeCell2D(i, cell.1)); - } - - Ok(()) - } -} - -#[derive(Clone, Copy, Debug)] -#[cfg_attr(test, derive(PartialEq))] -pub enum Opcode { - Add, - Subtract, - Right, - Left, - OpenLoop, - CloseLoop, - Output, - Input, - Clear, -} - -#[derive(Clone, Copy, Debug)] -#[cfg_attr(test, derive(PartialEq))] -pub enum Opcode2D { - Add, - Subtract, - Right, - Left, - OpenLoop, - CloseLoop, - Output, - Input, - Clear, - Up, - Down, -} - -pub struct BFBuilder2D { - opcodes: Vec, - pub head_pos: TapeCell2D, -} - -pub trait BrainfuckOpcodes { - fn to_string(self) -> String; - fn from_str(s: &str) -> Self; -} - -impl BrainfuckOpcodes for Vec { - fn to_string(self) -> String { - let mut s = String::new(); - self.into_iter().for_each(|o| { - s.push_str(match o { - Opcode2D::Add => "+", - Opcode2D::Subtract => "-", - Opcode2D::Right => ">", - Opcode2D::Left => "<", - Opcode2D::OpenLoop => "[", - Opcode2D::CloseLoop => "]", - Opcode2D::Output => ".", - Opcode2D::Input => ",", - Opcode2D::Clear => "[-]", - Opcode2D::Up => "^", - Opcode2D::Down => "v", - }) - }); - s - } - - fn from_str(s: &str) -> Vec { - let mut ops = Vec::new(); - let mut i = 0; - while i < s.len() { - let substr = &s[i..]; - if substr.starts_with("[-]") { - ops.push(Opcode2D::Clear); - i += 3; - } else { - match substr.chars().next().unwrap() { - '+' => ops.push(Opcode2D::Add), - '-' => ops.push(Opcode2D::Subtract), - '>' => ops.push(Opcode2D::Right), - '<' => ops.push(Opcode2D::Left), - '[' => ops.push(Opcode2D::OpenLoop), - ']' => ops.push(Opcode2D::CloseLoop), - '.' => ops.push(Opcode2D::Output), - ',' => ops.push(Opcode2D::Input), - '^' => ops.push(Opcode2D::Up), - 'v' => ops.push(Opcode2D::Down), - _ => (), // could put a little special opcode in for other characters - } - i += 1; - } - } - - ops - } -} - -impl BrainfuckOpcodes for BFBuilder2D { - fn to_string(self) -> String { - self.opcodes.to_string() - } - - fn from_str(s: &str) -> Self { - BFBuilder2D { - opcodes: Vec::from_str(s), - head_pos: TapeCell2D(0, 0), - } - } -} - -impl BFBuilder2D { - pub fn new() -> BFBuilder2D { - BFBuilder2D { - opcodes: Vec::new(), - head_pos: TapeCell2D(0, 0), - } - } - pub fn len(&self) -> usize { - self.opcodes.len() - } - pub fn push(&mut self, op: Opcode2D) { - self.opcodes.push(op); - } - pub fn extend(&mut self, ops: T) - where - T: IntoIterator, - { - self.opcodes.extend(ops); - } - pub fn move_to_cell(&mut self, cell: TapeCell2D) { - let x = cell.0; - let y = cell.1; - let x_pos = self.head_pos.0; - let y_pos = self.head_pos.1; - //Move x level - if x_pos < x { - for _ in x_pos..x { - self.opcodes.push(Opcode2D::Right); - } - } else if x < x_pos { - // theoretically equivalent to cell..head_pos? - for _ in ((x + 1)..=x_pos).rev() { - self.opcodes.push(Opcode2D::Left); - } - } - //Move y level - if y_pos < y { - for _ in y_pos..y { - self.opcodes.push(Opcode2D::Up); - } - } else if y < y_pos { - // theoretically equivalent to cell..head_pos? - for _ in ((y + 1)..=y_pos).rev() { - self.opcodes.push(Opcode2D::Down); - } - } - self.head_pos = cell; - } - - pub fn add_to_current_cell(&mut self, imm: i8) { - if imm > 0 { - for _ in 0..imm { - self.opcodes.push(Opcode2D::Add); - } - } else if imm < 0 { - // needs to be i32 because -(-128) = -128 in i8-land - for _ in 0..-(imm as i32) { - self.opcodes.push(Opcode2D::Subtract); - } - } - } -} - -#[cfg(test)] -mod backend_tests {} diff --git a/compiler/src/backend/bf.rs b/compiler/src/backend/bf.rs new file mode 100644 index 0000000..e8bf42e --- /dev/null +++ b/compiler/src/backend/bf.rs @@ -0,0 +1,251 @@ +use super::common::{ + BrainfuckBuilder, BrainfuckBuilderData, BrainfuckProgram, CellAllocator, CellAllocatorData, + OpcodeVariant, TapeCellVariant, +}; +use crate::{ + macros::macros::{r_assert, r_panic}, + tokeniser::Token, +}; + +pub type TapeCell = i32; +impl TapeCellVariant for TapeCell { + fn origin_cell() -> TapeCell { + 0 + } + fn with_offset(&self, offset: i32) -> Self { + self + offset + } +} + +#[derive(Clone, Copy, Debug)] +#[cfg_attr(test, derive(PartialEq))] +pub enum Opcode { + Add, + Subtract, + Right, + Left, + OpenLoop, + CloseLoop, + Output, + Input, + Clear, +} + +impl OpcodeVariant for Opcode { + fn from_token(token: &Token) -> Result { + Ok(match token { + Token::Plus => Opcode::Add, + Token::Minus => Opcode::Subtract, + Token::MoreThan => Opcode::Right, + Token::LessThan => Opcode::Left, + Token::OpenSquareBracket => Opcode::OpenLoop, + Token::ClosingSquareBracket => Opcode::CloseLoop, + Token::Dot => Opcode::Output, + Token::Comma => Opcode::Input, + _ => r_panic!("Invalid token in inline Brainfuck: {token:?}"), + }) + } +} + +impl CellAllocator for CellAllocatorData<'_, TapeCell> { + /// Check if the desired number of cells can be allocated to the right of a given location + fn check_allocatable(&mut self, location: &TapeCell, size: usize) -> bool { + for k in 0..size { + if self.cells.contains(&(location + k as i32)) { + return false; + } + } + return true; + } + + /// Allocate size number of cells and return the location, optionally specify a location + fn allocate(&mut self, location: Option, size: usize) -> Result { + if let Some(l) = location { + if !self.check_allocatable(&l, size) { + r_panic!("Location specifier @{l} conflicts with another allocation"); + } + } + + // find free space + let mut region_start = location.unwrap_or(0); + for i in region_start.. { + if self.cells.contains(&i) { + region_start = i + 1; + } else if i - region_start == (size as i32 - 1) { + break; + } + } + + for i in region_start..(region_start + size as i32) { + r_assert!( + self.cells.insert(i), + "Unreachable error detected in cell allocation: allocate({location:?}, {size:?})" + ); + } + + Ok(region_start) + } + + /// Allocate a cell as close as possible to the given cell, + /// used for optimisations which need extra cells for efficiency + fn allocate_temp_cell(&mut self, location: TapeCell) -> TapeCell { + // alternate left then right, getting further and further out + let mut left_iter = (0..=location).rev(); + let mut right_iter = (location + 1)..; + loop { + if let Some(i) = left_iter.next() { + // unallocated cell, allocate it and return + if self.cells.insert(i) { + return i; + } + } + + if let Some(i) = right_iter.next() { + if self.cells.insert(i) { + return i; + } + } + } + } + + fn free(&mut self, cell: TapeCell, size: usize) -> Result<(), String> { + for i in cell..(cell + size as i32) { + r_assert!( + self.cells.remove(&i), + "Cannot free cell @{i} as it is not allocated.", + ); + } + + Ok(()) + } +} + +impl BrainfuckProgram for Vec { + fn to_string(self) -> String { + let mut s = String::new(); + self.into_iter().for_each(|o| { + s.push_str(match o { + Opcode::Add => "+", + Opcode::Subtract => "-", + Opcode::Right => ">", + Opcode::Left => "<", + Opcode::OpenLoop => "[", + Opcode::CloseLoop => "]", + Opcode::Output => ".", + Opcode::Input => ",", + Opcode::Clear => "[-]", + }) + }); + s + } + + fn from_str(s: &str) -> Vec { + let mut ops = Vec::new(); + let mut i = 0; + while i < s.len() { + let substr = &s[i..]; + if substr.starts_with("[-]") { + ops.push(Opcode::Clear); + i += 3; + } else { + match substr.chars().next().unwrap() { + '+' => ops.push(Opcode::Add), + '-' => ops.push(Opcode::Subtract), + '>' => ops.push(Opcode::Right), + '<' => ops.push(Opcode::Left), + '[' => ops.push(Opcode::OpenLoop), + ']' => ops.push(Opcode::CloseLoop), + '.' => ops.push(Opcode::Output), + ',' => ops.push(Opcode::Input), + _ => (), // could put a little special opcode in for other characters + } + i += 1; + } + } + + ops + } +} + +impl BrainfuckProgram for BrainfuckBuilderData { + fn to_string(self) -> String { + self.opcodes.to_string() + } + + fn from_str(s: &str) -> BrainfuckBuilderData { + BrainfuckBuilderData { + opcodes: Vec::from_str(s), + head_pos: 0, + // head_pos: TapeCell(0), + } + } +} + +impl BrainfuckBuilder for BrainfuckBuilderData { + fn new() -> BrainfuckBuilderData { + BrainfuckBuilderData { + opcodes: Vec::new(), + head_pos: 0, + } + } + fn len(&self) -> usize { + self.opcodes.len() + } + fn push(&mut self, op: Opcode) { + self.opcodes.push(op); + } + fn extend(&mut self, ops: T) + where + T: IntoIterator, + { + self.opcodes.extend(ops); + } + fn move_to_cell(&mut self, cell: TapeCell) { + let x = cell; + let x_pos = self.head_pos; + //Move x level + if x_pos < x { + for _ in x_pos..x { + self.opcodes.push(Opcode::Right); + } + } else if x < x_pos { + // theoretically equivalent to cell..head_pos? + for _ in ((x + 1)..=x_pos).rev() { + self.opcodes.push(Opcode::Left); + } + } + + self.head_pos = cell; + } + + fn add_to_current_cell(&mut self, imm: i8) { + if imm > 0 { + for _ in 0..imm { + self.opcodes.push(Opcode::Add); + } + } else if imm < 0 { + // needs to be i32 because -(-128) = -128 in i8-land + for _ in 0..-(imm as i32) { + self.opcodes.push(Opcode::Subtract); + } + } + } + + fn clear_current_cell(&mut self) { + self.opcodes.push(Opcode::OpenLoop); + self.opcodes.push(Opcode::Subtract); + self.opcodes.push(Opcode::CloseLoop); + } + fn output_current_cell(&mut self) { + self.opcodes.push(Opcode::Output); + } + fn input_to_current_cell(&mut self) { + self.opcodes.push(Opcode::Input); + } + fn open_loop(&mut self) { + self.opcodes.push(Opcode::OpenLoop); + } + fn close_loop(&mut self) { + self.opcodes.push(Opcode::CloseLoop); + } +} diff --git a/compiler/src/backend/bf2d.rs b/compiler/src/backend/bf2d.rs new file mode 100644 index 0000000..432f2c6 --- /dev/null +++ b/compiler/src/backend/bf2d.rs @@ -0,0 +1,401 @@ +use super::common::{ + BrainfuckBuilder, BrainfuckBuilderData, BrainfuckProgram, CellAllocator, CellAllocatorData, + OpcodeVariant, TapeCellVariant, +}; +use crate::{ + macros::macros::{r_assert, r_panic}, + tokeniser::Token, +}; + +use std::{fmt::Display, hash::Hash}; + +#[derive(Debug, Hash, Eq, PartialEq, Clone, Copy)] +pub struct TapeCell2D(pub i32, pub i32); +impl TapeCellVariant for TapeCell2D { + fn origin_cell() -> TapeCell2D { + TapeCell2D(0, 0) + } + fn with_offset(&self, offset: i32) -> Self { + TapeCell2D(self.0 + offset, self.1) + } +} + +#[derive(Clone, Copy, Debug)] +#[cfg_attr(test, derive(PartialEq))] +pub enum Opcode2D { + Add, + Subtract, + Right, + Left, + OpenLoop, + CloseLoop, + Output, + Input, + Clear, + Up, + Down, +} + +impl OpcodeVariant for Opcode2D { + fn from_token(token: &Token) -> Result { + Ok(match token { + Token::Plus => Opcode2D::Add, + Token::Minus => Opcode2D::Subtract, + Token::MoreThan => Opcode2D::Right, + Token::LessThan => Opcode2D::Left, + Token::OpenSquareBracket => Opcode2D::OpenLoop, + Token::ClosingSquareBracket => Opcode2D::CloseLoop, + Token::Dot => Opcode2D::Output, + Token::Comma => Opcode2D::Input, + Token::Caret => Opcode2D::Up, + // TODO: implement this: + // Token::Down => Opcode2D::Down, + _ => r_panic!("Invalid token in inline Brainfuck: {token:?}"), + }) + } +} + +impl BrainfuckProgram for Vec { + fn to_string(self) -> String { + let mut s = String::new(); + self.into_iter().for_each(|o| { + s.push_str(match o { + Opcode2D::Add => "+", + Opcode2D::Subtract => "-", + Opcode2D::Right => ">", + Opcode2D::Left => "<", + Opcode2D::OpenLoop => "[", + Opcode2D::CloseLoop => "]", + Opcode2D::Output => ".", + Opcode2D::Input => ",", + Opcode2D::Clear => "[-]", + Opcode2D::Up => "^", + Opcode2D::Down => "v", + }) + }); + s + } + + fn from_str(s: &str) -> Vec { + let mut ops = Vec::new(); + let mut i = 0; + while i < s.len() { + let substr = &s[i..]; + if substr.starts_with("[-]") { + ops.push(Opcode2D::Clear); + i += 3; + } else { + match substr.chars().next().unwrap() { + '+' => ops.push(Opcode2D::Add), + '-' => ops.push(Opcode2D::Subtract), + '>' => ops.push(Opcode2D::Right), + '<' => ops.push(Opcode2D::Left), + '[' => ops.push(Opcode2D::OpenLoop), + ']' => ops.push(Opcode2D::CloseLoop), + '.' => ops.push(Opcode2D::Output), + ',' => ops.push(Opcode2D::Input), + '^' => ops.push(Opcode2D::Up), + 'v' => ops.push(Opcode2D::Down), + _ => (), // could put a little special opcode in for other characters + } + i += 1; + } + } + + ops + } +} + +impl Display for TapeCell2D { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("({}, {})", self.0, self.1))?; + Ok(()) + } +} + +impl CellAllocator for CellAllocatorData<'_, TapeCell2D> { + /// Check if the desired number of cells can be allocated to the right of a given location + fn check_allocatable(&mut self, location: &TapeCell2D, size: usize) -> bool { + for k in 0..size { + if self + .cells + .contains(&TapeCell2D(location.0 + k as i32, location.1)) + { + return false; + } + } + return true; + } + + /// Will either check a specific location can be allocated at the chosen size or if no location is + /// provided it will find a memory location where this size can be allocated + /// Uses a variety of memory allocation methods based on settings + fn allocate( + &mut self, + location: Option, + size: usize, + ) -> Result { + let mut region_start = location.unwrap_or(TapeCell2D(0, 0)); + //Check specified memory allocation above to ensure that this works nicely with all algorithms + if let Some(l) = location { + if !self.check_allocatable(&l, size) { + r_panic!("Location specifier @{l} conflicts with another allocation"); + } + } else { + // should the region start at the current tape head? + if self.config.memory_allocation_method == 0 { + for i in region_start.0.. { + if self.cells.contains(&TapeCell2D(i, region_start.1)) { + region_start = TapeCell2D(i + 1, region_start.1); + } else if i - region_start.0 == (size as i32 - 1) { + break; + } + } + } else if self.config.memory_allocation_method == 1 { + //Zig Zag + let mut found = false; + let mut loops = 0; + let mut i; + let mut j; + while !found { + i = region_start.0 + loops; + j = region_start.1; + for _ in 0..=loops { + if self.check_allocatable(&TapeCell2D(i, j), size) { + found = true; + region_start = TapeCell2D(i, j); + break; + } + i = i - 1; + j = j + 1; + } + loops += 1; + } + } else if self.config.memory_allocation_method == 2 { + //Spiral + let mut found = false; + let mut loops = 1; + let directions = ['N', 'E', 'S', 'W']; + let mut i = region_start.0; + let mut j = region_start.1; + while !found { + for dir in directions { + match dir { + 'N' => { + for _ in 0..loops { + j += 1; + if self.check_allocatable(&TapeCell2D(i, j), size) { + found = true; + region_start = TapeCell2D(i, j); + break; + } + } + } + 'E' => { + for _ in 0..loops { + i += 1; + if self.check_allocatable(&TapeCell2D(i, j), size) { + found = true; + region_start = TapeCell2D(i, j); + break; + } + } + } + 'S' => { + for _ in 0..loops { + j -= 1; + if self.check_allocatable(&TapeCell2D(i, j), size) { + found = true; + region_start = TapeCell2D(i, j); + break; + } + } + } + 'W' => { + for _ in 0..loops { + i -= 1; + if self.check_allocatable(&TapeCell2D(i, j), size) { + found = true; + region_start = TapeCell2D(i, j); + break; + } + } + } + _ => {} + } + if found { + break; + } + } + if found { + break; + } + i -= 1; + j -= 1; + loops += 2; + } + } else if self.config.memory_allocation_method == 3 { + //Tiles + let mut found = false; + let mut loops = 0; + while !found { + for i in -loops..=loops { + for j in -loops..=loops { + if self.check_allocatable( + &TapeCell2D(region_start.0 + i, region_start.1 + j), + size, + ) { + found = true; + region_start = TapeCell2D(region_start.0 + i, region_start.1 + j); + break; + } + } + if found { + break; + } + } + loops += 1; + } + } else { + panic!("Memory Allocation Method not implemented"); + } + } + + // make all cells in the specified region allocated + for i in region_start.0..(region_start.0 + size as i32) { + if !self.cells.contains(&TapeCell2D(i, region_start.1)) { + self.cells.insert(TapeCell2D(i, region_start.1)); + } + } + + Ok(region_start) + } + + /// Allocate a cell as close as possible to the given cell, + /// used for optimisations which need extra cells for efficiency + fn allocate_temp_cell(&mut self, location: TapeCell2D) -> TapeCell2D { + // alternate left then right, getting further and further out + let mut left_iter = (0..=location.0).rev(); + let mut right_iter = (location.0 + 1)..; + loop { + if let Some(i) = left_iter.next() { + // unallocated cell, allocate it and return + if self.cells.insert(TapeCell2D(i, location.1)) { + return TapeCell2D(i, location.1); + } + } + + if let Some(i) = right_iter.next() { + if self.cells.insert(TapeCell2D(i, location.1)) { + return TapeCell2D(i, location.1); + } + } + } + } + + fn free(&mut self, cell: TapeCell2D, size: usize) -> Result<(), String> { + for i in cell.0..(cell.0 + size as i32) { + let c = TapeCell2D(i, cell.1); + r_assert!( + self.cells.remove(&c), + "Cannot free cell @{c} as it is not allocated." + ); + } + + Ok(()) + } +} + +impl BrainfuckProgram for BrainfuckBuilderData { + fn to_string(self) -> String { + self.opcodes.to_string() + } + + fn from_str(s: &str) -> BrainfuckBuilderData { + BrainfuckBuilderData { + opcodes: Vec::from_str(s), + head_pos: TapeCell2D(0, 0), + } + } +} + +impl BrainfuckBuilder for BrainfuckBuilderData { + fn new() -> BrainfuckBuilderData { + BrainfuckBuilderData { + opcodes: Vec::new(), + head_pos: TapeCell2D(0, 0), + } + } + fn len(&self) -> usize { + self.opcodes.len() + } + fn push(&mut self, op: Opcode2D) { + self.opcodes.push(op); + } + fn extend(&mut self, ops: T) + where + T: IntoIterator, + { + self.opcodes.extend(ops); + } + fn move_to_cell(&mut self, cell: TapeCell2D) { + let x = cell.0; + let y = cell.1; + let x_pos = self.head_pos.0; + let y_pos = self.head_pos.1; + //Move x level + if x_pos < x { + for _ in x_pos..x { + self.opcodes.push(Opcode2D::Right); + } + } else if x < x_pos { + // theoretically equivalent to cell..head_pos? + for _ in ((x + 1)..=x_pos).rev() { + self.opcodes.push(Opcode2D::Left); + } + } + //Move y level + if y_pos < y { + for _ in y_pos..y { + self.opcodes.push(Opcode2D::Up); + } + } else if y < y_pos { + // theoretically equivalent to cell..head_pos? + for _ in ((y + 1)..=y_pos).rev() { + self.opcodes.push(Opcode2D::Down); + } + } + self.head_pos = cell; + } + + fn add_to_current_cell(&mut self, imm: i8) { + if imm > 0 { + for _ in 0..imm { + self.opcodes.push(Opcode2D::Add); + } + } else if imm < 0 { + // needs to be i32 because -(-128) = -128 in i8-land + for _ in 0..-(imm as i32) { + self.opcodes.push(Opcode2D::Subtract); + } + } + } + + fn clear_current_cell(&mut self) { + self.opcodes.push(Opcode2D::OpenLoop); + self.opcodes.push(Opcode2D::Subtract); + self.opcodes.push(Opcode2D::CloseLoop); + } + fn output_current_cell(&mut self) { + self.opcodes.push(Opcode2D::Output); + } + fn input_to_current_cell(&mut self) { + self.opcodes.push(Opcode2D::Input); + } + fn open_loop(&mut self) { + self.opcodes.push(Opcode2D::OpenLoop); + } + fn close_loop(&mut self) { + self.opcodes.push(Opcode2D::CloseLoop); + } +} diff --git a/compiler/src/backend/common.rs b/compiler/src/backend/common.rs new file mode 100644 index 0000000..a99aa02 --- /dev/null +++ b/compiler/src/backend/common.rs @@ -0,0 +1,469 @@ +use crate::{ + constants_optimiser::calculate_optimal_addition, + frontend::{CellLocation, Instruction, MemoryId}, + macros::macros::{r_assert, r_panic}, + misc::{MastermindConfig, MastermindContext}, + parser::TapeCellLocation, + tokeniser::Token, +}; + +use std::{ + collections::{HashMap, HashSet}, + num::Wrapping, +}; + +type LoopDepth = usize; +type TapeValue = u8; + +impl MastermindContext { + pub fn ir_to_bf<'a, TC: TapeCellVariant, OC: OpcodeVariant>( + &'a self, + instructions: Vec>, + return_to_cell: Option, + ) -> Result, String> + where + BrainfuckBuilderData: BrainfuckBuilder, + CellAllocatorData<'a, TC>: CellAllocator, + { + let mut allocator = CellAllocatorData::new(&self.config); + + struct AllocationMapEntry { + cell_base: TC, + size: usize, + alloc_loop_depth: LoopDepth, + known_values: Vec>, + } + let mut alloc_map: HashMap> = HashMap::new(); + + let mut loop_stack: Vec = Vec::new(); + let mut current_loop_depth: LoopDepth = 0; + let mut skipped_loop_depth: Option = None; + let mut ops = BrainfuckBuilderData::new(); + + for instruction in instructions { + if let Some(depth) = skipped_loop_depth { + // current loop is being skipped because of unreachable loop optimisations + match instruction { + Instruction::OpenLoop(_) => { + current_loop_depth += 1; + } + Instruction::CloseLoop(_) => { + current_loop_depth -= 1; + if current_loop_depth == depth { + skipped_loop_depth = None; + } + } + _ => (), + } + continue; + } + match instruction { + // the ids (indices really) given by the compiler are guaranteed to be unique (at the time of writing) + // however they will absolutely not be very efficient if used directly as cell locations + Instruction::Allocate(memory, location_specifier) => { + let cell = allocator.allocate(location_specifier, memory.len())?; + let None = alloc_map.insert( + memory.id(), + AllocationMapEntry { + cell_base: cell, + size: memory.len(), + alloc_loop_depth: current_loop_depth, + known_values: vec![Some(0); memory.len()], + }, + ) else { + r_panic!("Attempted to reallocate memory {memory:#?}"); + }; + } + Instruction::AssertCellValue(cell_obj, imm) => { + let Some(AllocationMapEntry { + cell_base: _, + size, + alloc_loop_depth, + known_values, + }) = alloc_map.get_mut(&cell_obj.memory_id) + else { + r_panic!( + "Attempted to assert value of cell {cell_obj:#?} \ +which could not be found" + ); + }; + + let mem_idx = cell_obj.index.unwrap_or(0); + r_assert!( + mem_idx < *size, + "Attempted to access memory outside of allocation" + ); + let known_value = &mut known_values[mem_idx]; + + // allow the user to assert that we don't know the value of the cell by clobbering when we do inline brainfuck + if *alloc_loop_depth == current_loop_depth || imm.is_none() { + *known_value = imm; + } else { + r_panic!( + "Cannot assert cell {cell_obj:#?} value \ +outside of loop it was allocated" + ); + } + } + Instruction::Free(id) => { + // TODO: do I need to check alloc loop depth here? Or are cells never freed in an inner scope? + // think about this in regards to reusing cell space when a cell isn't being used + let Some(AllocationMapEntry { + cell_base, + size, + alloc_loop_depth: _, + known_values, + }) = alloc_map.remove(&id) + else { + r_panic!("Attempted to free memory id {id} which could not be found"); + }; + + let None = known_values + .into_iter() + .find_map(|known_value| (known_value.unwrap_or(1) != 0).then_some(())) + else { + r_panic!( + "Attempted to free memory id {id} which has unknown or non-zero values" + ); + }; + + allocator.free(cell_base, size)?; + } + Instruction::OpenLoop(cell_obj) => { + let Some(AllocationMapEntry { + cell_base, + size, + alloc_loop_depth, + known_values, + }) = alloc_map.get_mut(&cell_obj.memory_id) + else { + r_panic!( + "Attempted to open loop at cell {cell_obj:#?} which could not be found" + ); + }; + + let mem_idx = cell_obj.index.unwrap_or(0); + r_assert!( + mem_idx < *size, + "Attempted to access memory outside of allocation" + ); + let cell = cell_base.with_offset(mem_idx as i32); + let known_value = &mut known_values[mem_idx]; + + let mut open = true; + + if let Some(known_value) = known_value { + if *alloc_loop_depth == current_loop_depth + && *known_value == 0 && self.config.optimise_unreachable_loops + { + open = false; + skipped_loop_depth = Some(current_loop_depth); + current_loop_depth += 1; + } + } + + // skip the loop if the optimisations are turned on and we know the value is 0 + if open { + ops.move_to_cell(cell); + ops.open_loop(); + loop_stack.push(cell); + current_loop_depth += 1; + } + } + Instruction::CloseLoop(cell_obj) => { + let Some(AllocationMapEntry { + cell_base, + size, + alloc_loop_depth, + known_values, + }) = alloc_map.get_mut(&cell_obj.memory_id) + else { + r_panic!( + "Attempted to close loop at cell {cell_obj:#?} which could not be found" + ); + }; + + let mem_idx = cell_obj.index.unwrap_or(0); + r_assert!( + mem_idx < *size, + "Attempted to access memory outside of allocation" + ); + let cell = cell_base.with_offset(mem_idx as i32); + let known_value = &mut known_values[mem_idx]; + + let Some(stack_cell) = loop_stack.pop() else { + r_panic!("Attempted to close un-opened loop"); + }; + r_assert!(cell == stack_cell, "Attempted to close a loop unbalanced"); + + current_loop_depth -= 1; + + ops.move_to_cell(cell); + ops.close_loop(); + + // if a loop finishes on a cell then it is guaranteed to be 0 based on brainfuck itself + // I did encounter issues with nested loops here, interesting + if current_loop_depth == *alloc_loop_depth { + *known_value = Some(0); + } + } + Instruction::AddToCell(cell_obj, imm) => { + let Some(AllocationMapEntry { + cell_base, + size, + alloc_loop_depth, + known_values, + }) = alloc_map.get_mut(&cell_obj.memory_id) + else { + r_panic!("Attempted to add to cell {cell_obj:#?} which could not be found"); + }; + + let mem_idx = cell_obj.index.unwrap_or(0); + r_assert!( + mem_idx < *size, + "Attempted to access memory outside of allocation" + ); + let cell = cell_base.with_offset(mem_idx as i32); + let known_value = &mut known_values[mem_idx]; + + // TODO: fix bug, if only one multiplication then we can have a value already in the cell, but never otherwise + + // not sure if these optimisations should be in the builder step or in the compiler + // if self.config.optimise_constants { + // // ops.move_to_cell(&mut head_pos, cell); + // // here we use an algorithm that finds the best combo of products and constants to make the number to minimise bf code + // // first we get the closest allocated cell so we can calculate the distance cost of multiplying + // // TODO: instead find the nearest zero cell, doesn't matter if allocated or not + // let temp_cell = allocator.allocate_temp_cell(cell); + + // let optimised_ops = + // calculate_optimal_addition(imm as i8, ops.head_pos, cell, temp_cell); + + // ops.head_pos = optimised_ops.head_pos; + // ops.extend(optimised_ops.opcodes); + + // allocator.free(temp_cell, 1)?; + // } else { + // ops.move_to_cell(cell); + // ops.add_to_current_cell(imm as i8); + // } + + // TODO: fix optimisations + ops.move_to_cell(cell); + ops.add_to_current_cell(imm as i8); + + if imm != 0 { + if *alloc_loop_depth != current_loop_depth { + *known_value = None; + } else if let Some(known_value) = known_value { + *known_value = (Wrapping(*known_value) + Wrapping(imm)).0; + } + } + } + Instruction::InputToCell(cell_obj) => { + let Some(AllocationMapEntry { + cell_base, + size, + alloc_loop_depth: _, + known_values, + }) = alloc_map.get_mut(&cell_obj.memory_id) + else { + r_panic!( + "Attempted to input to cell {cell_obj:#?} which could not be found" + ); + }; + + // TODO: refactor this duplicate code (get_cell_safe or something like that) + let mem_idx = cell_obj.index.unwrap_or(0); + r_assert!( + mem_idx < *size, + "Attempted to access memory outside of allocation" + ); + let cell = cell_base.with_offset(mem_idx as i32); + let known_value = &mut known_values[mem_idx]; + + ops.move_to_cell(cell); + ops.input_to_current_cell(); + // no way to know at compile time what the input to the program will be + *known_value = None; + } + // Instruction::AssertCellValue(id, value) => {} + Instruction::ClearCell(cell_obj) => { + let Some(AllocationMapEntry { + cell_base, + size, + alloc_loop_depth, + known_values, + }) = alloc_map.get_mut(&cell_obj.memory_id) + else { + r_panic!("Attempted to clear cell {cell_obj:#?} which could not be found"); + }; + + let mem_idx = cell_obj.index.unwrap_or(0); + r_assert!( + mem_idx < *size, + "Attempted to access memory outside of allocation" + ); + let cell = cell_base.with_offset(mem_idx as i32); + let known_value = &mut known_values[mem_idx]; + + ops.move_to_cell(cell); + + let mut clear = true; + + if let Some(known_value) = known_value { + if self.config.optimise_cell_clearing + && *alloc_loop_depth == current_loop_depth + // not sure if this should be 4 or 3, essentially it depends on if we prefer clears or changes [-] vs ++--- + && (*known_value as i8).abs() < 4 + { + // let imm = *known_value as i8; + // if imm > 0 { + // for _ in 0..imm { + // ops.push(Opcode2D::Subtract); + // } + // } else if imm < 0 { + // for _ in 0..-imm { + // ops.push(Opcode2D::Add); + // } + // } + ops.add_to_current_cell(-(*known_value as i8)); + clear = false; + } + } + + if clear { + ops.clear_current_cell(); + } + + if *alloc_loop_depth == current_loop_depth { + *known_value = Some(0); + } else { + // TODO: fix this for if statements + *known_value = None; + } + } + Instruction::OutputCell(cell_obj) => { + let Some(AllocationMapEntry { + cell_base, + size, + alloc_loop_depth: _, + known_values: _, + }) = alloc_map.get(&cell_obj.memory_id) + else { + r_panic!("Attempted to output cell {cell_obj:#?} which could not be found"); + }; + + let mem_idx = cell_obj.index.unwrap_or(0); + r_assert!( + mem_idx < *size, + "Attempted to access memory outside of allocation" + ); + let cell = cell_base.with_offset(mem_idx as i32); + + ops.move_to_cell(cell); + ops.output_current_cell(); + } + Instruction::InsertBrainfuckAtCell(operations, location_specifier) => { + // move to the correct cell, based on the location specifier + match location_specifier { + CellLocation::FixedCell(cell) => ops.move_to_cell(cell.into()), + CellLocation::MemoryCell(cell_obj) => { + let Some(AllocationMapEntry { + cell_base, + size, + alloc_loop_depth: _, + known_values: _, + }) = alloc_map.get(&cell_obj.memory_id) + else { + r_panic!("Attempted to use location of cell {cell_obj:#?} which could not be found"); + }; + let mem_idx = cell_obj.index.unwrap_or(0); + r_assert!( + mem_idx < *size, + "Attempted to access memory outside of allocation" + ); + let cell = cell_base.with_offset(mem_idx as i32); + ops.move_to_cell(cell); + } + CellLocation::Unspecified => (), + } + + // paste the in-line BF operations + ops.extend(operations); + } + } + } + + // this is used in embedded brainfuck contexts to preserve head position + if let Some(origin_cell) = return_to_cell { + ops.move_to_cell(origin_cell.into()); + } + + Ok(ops.opcodes) + } +} + +/// This trait must be implemented for a cell location type for a Brainfuck variant +/// for now this is implemented by TapeCell (i32 1D location specifier), and TapeCell2D (2D) +pub trait TapeCellVariant +where + Self: PartialEq + Copy + Clone + Eq + TapeCellLocation, +{ + fn origin_cell() -> Self; + fn with_offset(&self, offset: i32) -> Self; +} + +/// This trait must be implemented for a Brainfuck variant +pub trait OpcodeVariant +where + Self: Sized + Clone + Copy, +{ + fn from_token(token: &Token) -> Result; +} + +pub struct CellAllocatorData<'a, TC> { + pub cells: HashSet, + pub config: &'a MastermindConfig, +} +impl CellAllocatorData<'_, T> { + fn new(config: &MastermindConfig) -> CellAllocatorData { + CellAllocatorData { + cells: HashSet::new(), + config, + } + } +} + +pub trait CellAllocator { + fn check_allocatable(&mut self, location: &TC, size: usize) -> bool; + fn allocate(&mut self, location: Option, size: usize) -> Result; + fn allocate_temp_cell(&mut self, location: TC) -> TC; + fn free(&mut self, cell: TC, size: usize) -> Result<(), String>; +} + +pub struct BrainfuckBuilderData { + pub opcodes: Vec, + pub head_pos: TC, +} + +pub trait BrainfuckBuilder { + fn new() -> Self; + fn len(&self) -> usize; + fn push(&mut self, op: OC); + fn extend(&mut self, ops: T) + where + T: IntoIterator; + fn move_to_cell(&mut self, cell: TC); + fn add_to_current_cell(&mut self, imm: i8); + fn clear_current_cell(&mut self); + fn output_current_cell(&mut self); + fn input_to_current_cell(&mut self); + fn open_loop(&mut self); + fn close_loop(&mut self); +} + +pub trait BrainfuckProgram { + fn to_string(self) -> String; + fn from_str(s: &str) -> Self; +} diff --git a/compiler/src/backend/mod.rs b/compiler/src/backend/mod.rs new file mode 100644 index 0000000..8683037 --- /dev/null +++ b/compiler/src/backend/mod.rs @@ -0,0 +1,4 @@ +pub mod common; + +pub mod bf; +pub mod bf2d; diff --git a/compiler/src/brainfuck.rs b/compiler/src/brainfuck.rs index 977756e..27f9c82 100644 --- a/compiler/src/brainfuck.rs +++ b/compiler/src/brainfuck.rs @@ -8,7 +8,7 @@ use std::{ }; use crate::{ - cells::{TapeCell2D, TapeCellVariant}, + backend::{bf2d::TapeCell2D, common::TapeCellVariant}, macros::macros::r_panic, }; use wasm_bindgen::{JsCast, JsValue}; diff --git a/compiler/src/brainfuck_optimiser.rs b/compiler/src/brainfuck_optimiser.rs index a8171b8..a02e62d 100644 --- a/compiler/src/brainfuck_optimiser.rs +++ b/compiler/src/brainfuck_optimiser.rs @@ -1,7 +1,10 @@ use itertools::Itertools; use std::{collections::HashMap, num::Wrapping}; -use crate::{backend::Opcode2D, cells::TapeCell2D, misc::MastermindContext}; +use crate::{ + backend::bf2d::{Opcode2D, TapeCell2D}, + misc::MastermindContext, +}; // originally trivial post-compilation brainfuck optimisations // extended to 2D which makes it more difficult @@ -209,7 +212,7 @@ fn _move_position( #[cfg(test)] mod bf_optimiser_tests { use crate::{ - backend::BrainfuckOpcodes, + backend::common::BrainfuckProgram, misc::{MastermindConfig, MastermindContext}, }; @@ -245,21 +248,21 @@ mod bf_optimiser_tests { #[test] fn greedy_subset_equivalence_test_0() { - let v = BrainfuckOpcodes::from_str("+++>><<++>--->+++<><><><><<<<<+++"); //(3) 0 0 [5] -3 3 + let v = BrainfuckProgram::from_str("+++>><<++>--->+++<><><><><<<<<+++"); //(3) 0 0 [5] -3 3 let o = CTX_OPT.optimise_subset(v).to_string(); assert_eq!(o, "+++++>--->+++<<<<<+++"); } #[test] fn greedy_program_equivalence_test_0() { - let v = BrainfuckOpcodes::from_str("<><><>++<+[--++>>+<<-]"); + let v = BrainfuckProgram::from_str("<><><>++<+[--++>>+<<-]"); let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, "++<+[->>+<<]"); } #[test] fn greedy_program_equivalence_test_1() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "+++++++++>>+++>---->>>++++--<--++<>++<+<->]++--->+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 let o: String = CTX_OPT.optimise_bf_code(v).to_string(); @@ -268,28 +271,28 @@ mod bf_optimiser_tests { #[test] fn greedy_program_equivalence_test_2() { - let v = BrainfuckOpcodes::from_str(">><."); + let v = BrainfuckProgram::from_str(">><."); let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, ">."); } #[test] fn greedy_subset_equivalence_test_1() { - let v = BrainfuckOpcodes::from_str("+++<+++>[-]+++"); //(3) 0 0 [5] -3 3 + let v = BrainfuckProgram::from_str("+++<+++>[-]+++"); //(3) 0 0 [5] -3 3 let o = CTX_OPT.optimise_subset(v).to_string(); assert_eq!(o, "[-]+++<+++>"); } #[test] fn greedy_subset_equivalence_test_2() { - let v = BrainfuckOpcodes::from_str("+++<+++>[-]+++[-]<[-]--+>-"); //(3) 0 0 [5] -3 3 + let v = BrainfuckProgram::from_str("+++<+++>[-]+++[-]<[-]--+>-"); //(3) 0 0 [5] -3 3 let o = CTX_OPT.optimise_subset(v).to_string(); assert_eq!(o, "[-]-<[-]->"); } #[test] fn greedy_program_equivalence_test_3() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "+++++[-]+++++++++>>+++>---->>>++++--<--++<>++<+<->]++--->+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 let o: String = CTX_OPT.optimise_bf_code(v).to_string(); @@ -298,21 +301,21 @@ mod bf_optimiser_tests { #[test] fn greedy_two_dimensional_subset_equivalence_test_0() { - let v = BrainfuckOpcodes::from_str("+++^^vv++^---^+++v^v^v^v^vvvvv+++"); //(3) 0 0 [5] -3 3 + let v = BrainfuckProgram::from_str("+++^^vv++^---^+++v^v^v^v^vvvvv+++"); //(3) 0 0 [5] -3 3 let o = CTX_OPT.optimise_subset(v).to_string(); assert_eq!(o, "+++++^---^+++vvvvv+++"); } #[test] fn greedy_two_dimensional_program_equivalence_test_0() { - let v = BrainfuckOpcodes::from_str("v^v^v^++v+[--++^^+vv-]"); + let v = BrainfuckProgram::from_str("v^v^v^++v+[--++^^+vv-]"); let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, "++v+[-^^+vv]"); } #[test] fn greedy_two_dimensional_program_equivalence_test_1() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "+++++++++^^+++^----^^^++++--v--++vvhellov++++[-v+^^++v+v-^]++---^+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 let o: String = CTX_OPT.optimise_bf_code(v).to_string(); @@ -321,28 +324,28 @@ mod bf_optimiser_tests { #[test] fn greedy_two_dimensional_program_equivalence_test_2() { - let v = BrainfuckOpcodes::from_str("^^v."); + let v = BrainfuckProgram::from_str("^^v."); let o: String = CTX_OPT.optimise_bf_code(v).to_string(); assert_eq!(o, "^."); } #[test] fn greedy_two_dimensional_subset_equivalence_test_1() { - let v = BrainfuckOpcodes::from_str("+++v+++^[-]+++"); //(3) 0 0 [5] -3 3 + let v = BrainfuckProgram::from_str("+++v+++^[-]+++"); //(3) 0 0 [5] -3 3 let o = CTX_OPT.optimise_subset(v).to_string(); assert_eq!(o, "[-]+++v+++^"); } #[test] fn greedy_two_dimensional_subset_equivalence_test_2() { - let v = BrainfuckOpcodes::from_str("+++v+++^[-]+++[-]v[-]--+^-"); //(3) 0 0 [5] -3 3 + let v = BrainfuckProgram::from_str("+++v+++^[-]+++[-]v[-]--+^-"); //(3) 0 0 [5] -3 3 let o = CTX_OPT.optimise_subset(v).to_string(); assert_eq!(o, "[-]-v[-]-^"); } #[test] fn greedy_two_dimensional_program_equivalence_test_3() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "+++++[-]+++++++++^^+++^----^^^++++--v--++vvhellov++++[[-]v+^^++v+v-^]++---^+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 let o: String = CTX_OPT.optimise_bf_code(v).to_string(); @@ -352,7 +355,7 @@ mod bf_optimiser_tests { #[test] #[ignore] fn exhaustive_subset_equivalence_test_0() { - let v = BrainfuckOpcodes::from_str("+++>><<++>--->+++<><><><><<<<<+++"); //(3) 0 0 [5] -3 3 + let v = BrainfuckProgram::from_str("+++>><<++>--->+++<><><><><<<<<+++"); //(3) 0 0 [5] -3 3 let o = CTX_OPT_EXHAUSTIVE.optimise_subset(v).to_string(); assert_eq!(o, ">--->+++<<+++++<<<+++"); } @@ -360,7 +363,7 @@ mod bf_optimiser_tests { #[test] #[ignore] fn exhaustive_program_equivalence_test_0() { - let v = BrainfuckOpcodes::from_str("<><><>++<+[--++>>+<<-]"); + let v = BrainfuckProgram::from_str("<><><>++<+[--++>>+<<-]"); let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, "++<+[>>+<<-]"); } @@ -368,7 +371,7 @@ mod bf_optimiser_tests { #[test] #[ignore] fn exhaustive_program_equivalence_test_1() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "+++++++++>>+++>---->>>++++--<--++<>++<+<->]++--->+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); @@ -378,7 +381,7 @@ mod bf_optimiser_tests { #[test] #[ignore] fn exhaustive_program_equivalence_test_2() { - let v = BrainfuckOpcodes::from_str(">><."); + let v = BrainfuckProgram::from_str(">><."); let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, ">."); } @@ -386,7 +389,7 @@ mod bf_optimiser_tests { #[test] #[ignore] fn exhaustive_subset_equivalence_test_1() { - let v = BrainfuckOpcodes::from_str("+++<+++>[-]+++"); //(3) 0 0 [5] -3 3 + let v = BrainfuckProgram::from_str("+++<+++>[-]+++"); //(3) 0 0 [5] -3 3 let o = CTX_OPT_EXHAUSTIVE.optimise_subset(v).to_string(); assert_eq!(o, "[-]+++<+++>"); } @@ -394,7 +397,7 @@ mod bf_optimiser_tests { #[test] #[ignore] fn exhaustive_subset_equivalence_test_2() { - let v = BrainfuckOpcodes::from_str("+++<+++>[-]+++[-]<[-]--+>-"); //(3) 0 0 [5] -3 3 + let v = BrainfuckProgram::from_str("+++<+++>[-]+++[-]<[-]--+>-"); //(3) 0 0 [5] -3 3 let o = CTX_OPT_EXHAUSTIVE.optimise_subset(v).to_string(); assert_eq!(o, "[-]-<[-]->"); } @@ -402,7 +405,7 @@ mod bf_optimiser_tests { #[test] #[ignore] fn exhaustive_program_equivalence_test_3() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "+++++[-]+++++++++>>+++>---->>>++++--<--++<>++<+<->]++--->+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); @@ -412,7 +415,7 @@ mod bf_optimiser_tests { #[test] #[ignore] fn exhaustive_two_dimensional_subset_equivalence_test_0() { - let v = BrainfuckOpcodes::from_str("+++^^vv++^---^+++v^v^v^v^vvvvv+++"); //(3) 0 0 [5] -3 3 + let v = BrainfuckProgram::from_str("+++^^vv++^---^+++v^v^v^v^vvvvv+++"); //(3) 0 0 [5] -3 3 let o = CTX_OPT_EXHAUSTIVE.optimise_subset(v).to_string(); assert_eq!(o, "^^+++v---v+++++vvv+++"); } @@ -420,7 +423,7 @@ mod bf_optimiser_tests { #[test] #[ignore] fn exhaustive_two_dimensional_program_equivalence_test_0() { - let v = BrainfuckOpcodes::from_str("v^v^v^++v+[--++^^+vv-]"); + let v = BrainfuckProgram::from_str("v^v^v^++v+[--++^^+vv-]"); let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, "++v+[^^+vv-]"); } @@ -428,7 +431,7 @@ mod bf_optimiser_tests { #[test] #[ignore] fn exhaustive_two_dimensional_program_equivalence_test_1() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "+++++++++^^+++^----^^^++++--v--++vvhellov++++[-v+^^++v+v-^]++---^+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); @@ -438,7 +441,7 @@ mod bf_optimiser_tests { #[test] #[ignore] fn exhaustive_two_dimensional_program_equivalence_test_2() { - let v = BrainfuckOpcodes::from_str("^^v."); + let v = BrainfuckProgram::from_str("^^v."); let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); assert_eq!(o, "^."); } @@ -446,7 +449,7 @@ mod bf_optimiser_tests { #[test] #[ignore] fn exhaustive_two_dimensional_subset_equivalence_test_1() { - let v = BrainfuckOpcodes::from_str("+++v+++^[-]+++"); //(3) 0 0 [5] -3 3 + let v = BrainfuckProgram::from_str("+++v+++^[-]+++"); //(3) 0 0 [5] -3 3 let o = CTX_OPT_EXHAUSTIVE.optimise_subset(v).to_string(); assert_eq!(o, "[-]+++v+++^"); } @@ -454,7 +457,7 @@ mod bf_optimiser_tests { #[test] #[ignore] fn exhaustive_two_dimensional_subset_equivalence_test_2() { - let v = BrainfuckOpcodes::from_str("+++v+++^[-]+++[-]v[-]--+^-"); //(3) 0 0 [5] -3 3 + let v = BrainfuckProgram::from_str("+++v+++^[-]+++[-]v[-]--+^-"); //(3) 0 0 [5] -3 3 let o = CTX_OPT_EXHAUSTIVE.optimise_subset(v).to_string(); assert_eq!(o, "[-]-v[-]-^"); } @@ -462,7 +465,7 @@ mod bf_optimiser_tests { #[test] #[ignore] fn exhaustive_two_dimensional_program_equivalence_test_3() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "+++++[-]+++++++++^^+++^----^^^++++--v--++vvhellov++++[[-]v+^^++v+v-^]++---^+", ); // [9] 0 (7) -4 0 0 2 // [(0)] 2 // -1 1 let o: String = CTX_OPT_EXHAUSTIVE.optimise_bf_code(v).to_string(); @@ -470,7 +473,7 @@ mod bf_optimiser_tests { } fn subset_edge_case_0() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "-++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++", ); let o: String = CTX_OPT.optimise_subset(v).to_string(); @@ -480,7 +483,7 @@ mod bf_optimiser_tests { #[test] fn subset_edge_case_1() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++", ); let o: String = CTX_OPT.optimise_subset(v).to_string(); @@ -490,7 +493,7 @@ mod bf_optimiser_tests { #[test] fn subset_edge_case_2() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "+--------------------------------------------------------------------------------------------------------------------------------" ); let o: String = CTX_OPT.optimise_subset(v).to_string(); @@ -500,7 +503,7 @@ mod bf_optimiser_tests { #[test] fn subset_edge_case_3() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "--------------------------------------------------------------------------------------------------------------------------------" ); let o: String = CTX_OPT.optimise_subset(v).to_string(); @@ -510,7 +513,7 @@ mod bf_optimiser_tests { #[test] fn subset_edge_case_3a() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "- --------------------------------------------------------------------------------------------------------------------------------" ); let o: String = CTX_OPT.optimise_subset(v).to_string(); @@ -520,7 +523,7 @@ mod bf_optimiser_tests { #[test] fn subset_edge_case_4() { - let v = BrainfuckOpcodes::from_str( + let v = BrainfuckProgram::from_str( "[-]--------------------------------------------------------------------------------------------------------------------------------" ); let o: String = CTX_OPT.optimise_subset(v).to_string(); diff --git a/compiler/src/cells.rs b/compiler/src/cells.rs deleted file mode 100644 index acb4bc3..0000000 --- a/compiler/src/cells.rs +++ /dev/null @@ -1,49 +0,0 @@ -use std::fmt::Display; - -use crate::parser::TapeCellLocation; - -/// when making Brainfuck variants, for a cell location type, you must implement this trait -/// for now this is implemented by TapeCell (1D location specifier), and TapeCell2D (2D) -pub trait TapeCellVariant -where - Self: PartialEq + Copy + Clone + Eq + TapeOrigin + TapeCellLocation, -{ -} - -#[derive(Debug, Hash, Eq, PartialEq, Clone, Copy)] -pub struct TapeCell(pub i32); - -#[derive(Debug, Hash, Eq, PartialEq, Clone, Copy)] -pub struct TapeCell2D(pub i32, pub i32); - -impl TapeCellVariant for TapeCell {} -impl TapeCellVariant for TapeCell2D {} - -impl Display for TapeCell { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!("{}", self.0))?; - Ok(()) - } -} - -impl Display for TapeCell2D { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!("({}, {})", self.0, self.1))?; - Ok(()) - } -} - -pub trait TapeOrigin { - fn origin_cell() -> Self; -} - -impl TapeOrigin for TapeCell { - fn origin_cell() -> TapeCell { - TapeCell(0) - } -} -impl TapeOrigin for TapeCell2D { - fn origin_cell() -> TapeCell2D { - TapeCell2D(0, 0) - } -} diff --git a/compiler/src/constants_optimiser.rs b/compiler/src/constants_optimiser.rs index 520ead7..dfa3505 100644 --- a/compiler/src/constants_optimiser.rs +++ b/compiler/src/constants_optimiser.rs @@ -1,7 +1,6 @@ -// TODO: make unit tests for this -use crate::{ - backend::{BFBuilder2D, Opcode2D}, - cells::TapeCell2D, +use crate::backend::{ + bf2d::{Opcode2D, TapeCell2D}, + common::{BrainfuckBuilder, BrainfuckBuilderData}, }; // basically, most ascii characters are large numbers, which are more efficient to calculate with multiplication than with a bunch of + or - @@ -11,19 +10,20 @@ use crate::{ // 7 * 4 : {>}(tricky)+++++++[<++++>-]< // 5 * 5 * 7 : +++++[>+++++<-]>[<+++++++>-]< +// TODO: make unit tests for this pub fn calculate_optimal_addition( value: i8, start_cell: TapeCell2D, target_cell: TapeCell2D, temp_cell: TapeCell2D, -) -> BFBuilder2D { +) -> BrainfuckBuilderData { // can't abs() i8 directly because there is no +128i8, so abs(-128i8) crashes let abs_value = (value as i32).abs(); // STAGE 0: // for efficiency's sake, calculate the cost of just adding the constant to the cell let naive_solution = { - let mut ops = BFBuilder2D::new(); + let mut ops = BrainfuckBuilderData::new(); ops.head_pos = start_cell; ops.move_to_cell(target_cell); ops.add_to_current_cell(value); @@ -74,7 +74,7 @@ pub fn calculate_optimal_addition( assert_eq!(best_combinations.len(), (abs_value as usize) + 1); let (a, b, c) = best_combinations.into_iter().last().unwrap(); - let mut ops = BFBuilder2D::new(); + let mut ops = BrainfuckBuilderData::new(); ops.head_pos = start_cell; ops.move_to_cell(temp_cell); diff --git a/compiler/src/frontend.rs b/compiler/src/frontend.rs index 785cb6d..be600e6 100644 --- a/compiler/src/frontend.rs +++ b/compiler/src/frontend.rs @@ -3,8 +3,10 @@ use std::{collections::HashMap, fmt::Display, iter::zip}; use crate::{ - backend::Opcode2D, - cells::{TapeCell, TapeCell2D, TapeCellVariant}, + backend::common::{ + BrainfuckBuilder, BrainfuckBuilderData, CellAllocator, CellAllocatorData, OpcodeVariant, + TapeCellVariant, + }, macros::macros::{r_assert, r_panic}, misc::MastermindContext, parser::{ @@ -13,19 +15,16 @@ use crate::{ }, }; -// TODO: remove the need for this Into: -impl Into for TapeCell { - fn into(self) -> TapeCell2D { - TapeCell2D(self.0, 0) - } -} - impl MastermindContext { - pub fn create_ir_scope<'a, TC: 'static + TapeCellVariant + Into>( + pub fn create_ir_scope<'a, TC: 'static + TapeCellVariant, OC: 'static + OpcodeVariant>( &self, - clauses: &[Clause], - outer_scope: Option<&'a ScopeBuilder>, - ) -> Result, String> { + clauses: &[Clause], + outer_scope: Option<&'a ScopeBuilder>, + ) -> Result, String> + where + BrainfuckBuilderData: BrainfuckBuilder, + CellAllocatorData<'a, TC>: CellAllocator, + { let mut scope = if let Some(outer) = outer_scope { outer.open_inner() } else { @@ -492,7 +491,7 @@ impl MastermindContext { operations, } => { // loop through the opcodes - let mut expanded_bf: Vec = Vec::new(); + let mut expanded_bf: Vec = Vec::new(); for op in operations { match op { ExtendedOpcode::Block(mm_clauses) => { @@ -605,7 +604,7 @@ impl MastermindContext { // this is subject to change #[derive(Debug, Clone)] -pub enum Instruction { +pub enum Instruction { Allocate(Memory, Option), Free(MemoryId), // the number indicates which cell in the allocation stack should be freed (cell 0, is the top of the stack, 1 is the second element, etc) OpenLoop(CellReference), // same with other numbers here, they indicate the cell in the allocation stack to use in the instruction @@ -615,7 +614,7 @@ pub enum Instruction { ClearCell(CellReference), // not sure if this should be here, seems common enough that it should be AssertCellValue(CellReference, Option), // allows the user to hand-tune optimisations further OutputCell(CellReference), - InsertBrainfuckAtCell(Vec, CellLocation), + InsertBrainfuckAtCell(Vec, CellLocation), } #[derive(Debug, Clone)] @@ -685,9 +684,9 @@ impl Memory { #[derive(Clone, Debug)] /// Scope type represents a Mastermind code block, /// any variables or functions defined within a {block} are owned by the scope and cleaned up before continuing -pub struct ScopeBuilder<'a, TapeCell> { +pub struct ScopeBuilder<'a, TC, OC> { /// a reference to the parent scope, for accessing things defined outside of this scope - outer_scope: Option<&'a ScopeBuilder<'a, TapeCell>>, + outer_scope: Option<&'a ScopeBuilder<'a, TC, OC>>, /// fn_only: true if syntactic context instead of normal context. /// Used for embedded mm so that the inner mm can use outer functions but not variables. types_only: bool, @@ -699,22 +698,18 @@ pub struct ScopeBuilder<'a, TapeCell> { variable_memory: HashMap, /// Functions accessible by any code within or in the current scope - functions: Vec<( - String, - Vec<(String, ValueType)>, - Vec>, - )>, + functions: Vec<(String, Vec<(String, ValueType)>, Vec>)>, /// Struct types definitions structs: HashMap, /// Intermediate instructions generated by the compiler - instructions: Vec>, + instructions: Vec>, } #[derive(Clone, Debug)] // probably shouldn't be cloning here but whatever -struct Function { +struct Function { arguments: Vec<(String, ValueType)>, - block: Vec>, + block: Vec>, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -848,11 +843,12 @@ impl ValueType { } } -impl ScopeBuilder<'_, TC> +impl ScopeBuilder<'_, TC, OC> where TC: Display + Clone, + OC: Clone, { - pub fn new() -> ScopeBuilder<'static, TC> { + pub fn new() -> ScopeBuilder<'static, TC, OC> { ScopeBuilder { outer_scope: None, types_only: false, @@ -866,7 +862,7 @@ where // I don't love this system of deciding what to clean up at the end in this specific function, but I'm not sure what the best way to achieve this would be // this used to be called "get_instructions" but I think this more implies things are being modified - pub fn build_ir(mut self, clean_up_variables: bool) -> Vec> { + pub fn build_ir(mut self, clean_up_variables: bool) -> Vec> { if !clean_up_variables { return self.instructions; } @@ -909,12 +905,12 @@ where self.instructions } - fn push_instruction(&mut self, instruction: Instruction) { + fn push_instruction(&mut self, instruction: Instruction) { self.instructions.push(instruction); } /// Open a scope within the current one, any time there is a {} in Mastermind, this is called - fn open_inner(&self) -> ScopeBuilder { + fn open_inner(&self) -> ScopeBuilder { ScopeBuilder { outer_scope: Some(self), types_only: false, @@ -928,7 +924,7 @@ where // syntactic context instead of normal context // used for embedded mm so that the inner mm can use outer functions - fn open_inner_templates_only(&self) -> ScopeBuilder { + fn open_inner_templates_only(&self) -> ScopeBuilder { ScopeBuilder { outer_scope: Some(self), types_only: true, @@ -1010,7 +1006,7 @@ where &self, calling_name: &str, calling_arg_types: &Vec<&ValueType>, - ) -> Result, String> { + ) -> Result, String> { // this function is unaffected by the self.fn_only flag Ok( if let Some(func) = self.functions.iter().find(|(name, args, _)| { @@ -1070,7 +1066,7 @@ where &mut self, new_function_name: &str, new_arguments: Vec>, - new_block: Vec>, + new_block: Vec>, ) -> Result<(), String> { let absolute_arguments = new_arguments .into_iter() diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index e6fadcb..8c83c5f 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -5,7 +5,6 @@ mod backend; mod brainfuck; mod brainfuck_optimiser; -mod cells; mod constants_optimiser; mod frontend; mod macros; @@ -15,9 +14,12 @@ mod preprocessor; mod tests; mod tokeniser; use crate::{ - backend::BrainfuckOpcodes, + backend::{ + bf::{Opcode, TapeCell}, + bf2d::{Opcode2D, TapeCell2D}, + common::BrainfuckProgram, + }, brainfuck::{BrainfuckConfig, BrainfuckContext}, - cells::{TapeCell, TapeCell2D}, misc::MastermindContext, parser::parse, preprocessor::preprocess_from_memory, @@ -53,20 +55,23 @@ pub fn wasm_compile( let preprocessed_file = preprocess_from_memory(&file_contents, entry_file_name)?; let tokens = tokenise(&preprocessed_file)?; - let bf_code = if ctx.config.enable_2d_grid { - let parsed_syntax = parse::(&tokens)?; + if ctx.config.enable_2d_grid { + let parsed_syntax = parse::(&tokens)?; let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); - ctx.ir_to_bf(instructions, None)? + let bf_code = ctx.ir_to_bf(instructions, None)?; + Ok(bf_code.to_string()) } else { - let parsed_syntax = parse::(&tokens)?; + let parsed_syntax = parse::(&tokens)?; let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); - ctx.ir_to_bf(instructions, None)? - }; + let bf_code = ctx.ir_to_bf(instructions, None)?; + Ok(bf_code.to_string()) + } - Ok(match ctx.config.optimise_generated_code { - true => ctx.optimise_bf_code(bf_code).to_string(), - false => bf_code.to_string(), - }) + // TODO: fix optimisations + // Ok(match ctx.config.optimise_generated_code { + // true => ctx.optimise_bf_code(bf_code).to_string(), + // false => bf_code.to_string(), + // }) } #[wasm_bindgen] diff --git a/compiler/src/main.rs b/compiler/src/main.rs index 380ed17..3076cfe 100644 --- a/compiler/src/main.rs +++ b/compiler/src/main.rs @@ -5,7 +5,6 @@ mod backend; mod brainfuck; mod brainfuck_optimiser; -mod cells; mod constants_optimiser; mod frontend; mod macros; @@ -15,9 +14,7 @@ mod preprocessor; mod tests; mod tokeniser; use crate::{ - backend::BrainfuckOpcodes, brainfuck::{BrainfuckConfig, BrainfuckContext}, - cells::{TapeCell, TapeCell2D}, misc::{MastermindConfig, MastermindContext}, parser::parse, preprocessor::preprocess, @@ -99,11 +96,11 @@ fn main() -> Result<(), String> { // compile the provided file let tokens = tokenise(&program)?; let bf_code = if ctx.config.enable_2d_grid { - let parsed_syntax = parse::(&tokens)?; + let parsed_syntax = parse::(&tokens)?; let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); ctx.ir_to_bf(instructions, None)? } else { - let parsed_syntax = parse::(&tokens)?; + let parsed_syntax = parse::(&tokens)?; let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); ctx.ir_to_bf(instructions, None)? }; diff --git a/compiler/src/parser.rs b/compiler/src/parser.rs index 695125f..5871ef4 100644 --- a/compiler/src/parser.rs +++ b/compiler/src/parser.rs @@ -1,7 +1,6 @@ // project dependencies: use crate::{ - backend::Opcode2D, - cells::{TapeCell, TapeCell2D}, + backend::{bf::TapeCell, bf2d::TapeCell2D, common::OpcodeVariant}, macros::macros::{r_assert, r_panic}, tokeniser::Token, }; @@ -10,7 +9,9 @@ use crate::{ use std::{fmt::Display, mem::discriminant, num::Wrapping}; /// recursive function to create a tree representation of the program -pub fn parse(tokens: &[Token]) -> Result>, String> { +pub fn parse( + tokens: &[Token], +) -> Result>, String> { // basic steps: // chew off tokens from the front, recursively parse blocks of tokens let mut clauses = Vec::new(); @@ -114,9 +115,7 @@ pub fn parse(tokens: &[Token]) -> Result( - clause: &[Token], -) -> Result, String> { +fn parse_let_clause(clause: &[Token]) -> Result, String> { // cell x = 0; // struct DummyStruct y let mut i = 0usize; @@ -140,9 +139,7 @@ fn parse_let_clause( } /// Parse tokens representing a struct definition into a clause -fn parse_struct_clause( - clause: &[Token], -) -> Result, String> { +fn parse_struct_clause(clause: &[Token]) -> Result, String> { let mut i = 0usize; let Token::Struct = &clause[i] else { r_panic!("Expected struct keyword in struct clause. This should never occur. {clause:#?}"); @@ -190,7 +187,7 @@ fn parse_struct_clause( }) } -fn parse_add_clause(clause: &[Token]) -> Result>, String> { +fn parse_add_clause(clause: &[Token]) -> Result>, String> { let mut clauses = Vec::new(); let mut i = 0usize; let (var, len) = parse_var_target(&clause[i..])?; @@ -225,7 +222,7 @@ fn parse_add_clause(clause: &[Token]) -> Result>, Str } // currently just syntax sugar, should make it actually do post/pre increments -fn parse_increment_clause(clause: &[Token]) -> Result, String> { +fn parse_increment_clause(clause: &[Token]) -> Result, String> { let (var, _) = parse_var_target(&clause[2..])?; //An increment clause can never be self referencing since it just VAR++ Ok(match (&clause[0], &clause[1]) { @@ -246,7 +243,7 @@ fn parse_increment_clause(clause: &[Token]) -> Result, St // assumed that the final token is a semicolon } -fn parse_set_clause(clause: &[Token]) -> Result>, String> { +fn parse_set_clause(clause: &[Token]) -> Result>, String> { // TODO: what do we do about arrays and strings and structs? let mut clauses = Vec::new(); let mut i = 0usize; @@ -295,10 +292,10 @@ fn parse_set_clause(clause: &[Token]) -> Result>, Str Ok(clauses) } -fn parse_drain_copy_clause( +fn parse_drain_copy_clause( clause: &[Token], is_draining: bool, -) -> Result, String> { +) -> Result, String> { // drain g {i += 1;}; // drain g into j; // copy foo into bar {g += 2; etc;}; @@ -361,9 +358,9 @@ fn parse_drain_copy_clause( }) } -fn parse_while_clause( +fn parse_while_clause( clause: &[Token], -) -> Result, String> { +) -> Result, String> { // TODO: make this able to accept expressions let mut i = 1usize; let (var, len) = parse_var_target(&clause[i..])?; @@ -379,9 +376,9 @@ fn parse_while_clause( }) } -fn parse_if_else_clause( +fn parse_if_else_clause( clause: &[Token], -) -> Result, String> { +) -> Result, String> { // skip first token, assumed to start with if let mut i = 1usize; let mut not = false; @@ -435,7 +432,7 @@ fn parse_if_else_clause( }) } -fn parse_output_clause(clause: &[Token]) -> Result, String> { +fn parse_output_clause(clause: &[Token]) -> Result, String> { let mut i = 1usize; let expr_tokens = &clause[i..(clause.len() - 1)]; @@ -449,7 +446,7 @@ fn parse_output_clause(clause: &[Token]) -> Result, Strin Ok(Clause::OutputValue { value: expr }) } -fn parse_input_clause(clause: &[Token]) -> Result, String> { +fn parse_input_clause(clause: &[Token]) -> Result, String> { let mut i = 1usize; let (var, len) = parse_var_target(&clause[i..])?; @@ -462,7 +459,7 @@ fn parse_input_clause(clause: &[Token]) -> Result, String Ok(Clause::InputVariable { var }) } -fn parse_assert_clause(clause: &[Token]) -> Result, String> { +fn parse_assert_clause(clause: &[Token]) -> Result, String> { let mut i = 1usize; let (var, len) = parse_var_target(&clause[i..])?; @@ -577,7 +574,7 @@ impl TapeCellLocation for TapeCell { Token::Minus | Token::Digits(_) => { let (offset, len) = parse_integer(&tokens[i..])?; i += len; - LocationSpecifier::Cell(TapeCell(offset)) + LocationSpecifier::Cell(offset) } Token::Name(_) => { // variable location specifier @@ -596,8 +593,8 @@ impl TapeCellLocation for TapeCell { } fn to_positive_cell_offset(&self) -> Result { - r_assert!(self.0 >= 0, "Expected non-negative cell offset."); - Ok(self.0 as usize) + r_assert!(*self >= 0, "Expected non-negative cell offset."); + Ok(*self as usize) } } @@ -649,9 +646,9 @@ impl TapeCellLocation for TapeCell2D { } } -fn parse_brainfuck_clause( +fn parse_brainfuck_clause( clause: &[Token], -) -> Result, String> { +) -> Result, String> { // bf {++--<><} // bf @3 {++--<><} // bf clobbers var1 var2 {++--<><} @@ -689,36 +686,18 @@ fn parse_brainfuck_clause( let mut j = 0; while j < bf_tokens.len() { match &bf_tokens[j] { - Token::Plus => ops.push(ExtendedOpcode::Opcode(Opcode2D::Add)), - Token::Minus => ops.push(ExtendedOpcode::Opcode(Opcode2D::Subtract)), - Token::MoreThan => ops.push(ExtendedOpcode::Opcode(Opcode2D::Right)), - Token::LessThan => ops.push(ExtendedOpcode::Opcode(Opcode2D::Left)), - Token::OpenSquareBracket => ops.push(ExtendedOpcode::Opcode(Opcode2D::OpenLoop)), - Token::ClosingSquareBracket => ops.push(ExtendedOpcode::Opcode(Opcode2D::CloseLoop)), - Token::Dot => ops.push(ExtendedOpcode::Opcode(Opcode2D::Output)), - Token::Comma => ops.push(ExtendedOpcode::Opcode(Opcode2D::Input)), - // TODO: refactor this: - Token::Caret => ops.push(ExtendedOpcode::Opcode(Opcode2D::Up)), - Token::Name(s) => { - for c in s.chars() { - if c == 'v' { - ops.push(ExtendedOpcode::Opcode(Opcode2D::Down)); - } else { - panic!("Invalid Inline Brainfuck Characters in {s}"); - } - } - } Token::OpenBrace => { // embedded mastermind let block_tokens = get_braced_tokens(&bf_tokens[j..], BRACES)?; let clauses = parse(block_tokens)?; ops.push(ExtendedOpcode::Block(clauses)); - j += block_tokens.len() + 1; + j += block_tokens.len() + 2; + } + token @ _ => { + ops.push(ExtendedOpcode::Opcode(OC::from_token(token)?)); + j += 1; } - // not sure whether to panic here or do nothing - _ => (), } - j += 1; } Ok(Clause::InlineBrainfuck { @@ -728,9 +707,9 @@ fn parse_brainfuck_clause( }) } -fn parse_function_definition_clause( +fn parse_function_definition_clause( clause: &[Token], -) -> Result, String> { +) -> Result, String> { let mut i = 1usize; // function name let Token::Name(name) = &clause[i] else { @@ -780,7 +759,7 @@ fn parse_function_definition_clause( }) } -fn parse_function_call_clause(clause: &[Token]) -> Result, String> { +fn parse_function_call_clause(clause: &[Token]) -> Result, String> { let mut i = 0usize; // Okay I didn't know this rust syntax, could have used it all over the place let Token::Name(name) = &clause[i] else { @@ -1567,13 +1546,13 @@ impl Display for VariableTarget { #[cfg(test)] mod parser_tests { - use crate::{backend::Opcode2D, cells::TapeCell}; + use crate::backend::{bf::TapeCell, bf2d::Opcode2D}; use super::*; #[test] fn parse_if_1() { - assert!(parse::(&[ + assert!(parse::(&[ // if true {{}} Token::If, Token::True, @@ -1593,24 +1572,26 @@ mod parser_tests { #[test] fn end_tokens_1() { - let _ = parse::(&[Token::Clobbers]).expect_err(""); + let _ = parse::(&[Token::Clobbers]).expect_err(""); } #[test] fn end_tokens_2() { - let _ = parse::(&[Token::Semicolon]).unwrap(); - let _ = parse::(&[Token::Semicolon, Token::Semicolon]).unwrap(); - let _ = parse::(&[Token::Semicolon, Token::Semicolon, Token::Semicolon]).unwrap(); + let _ = parse::(&[Token::Semicolon]).unwrap(); + let _ = parse::(&[Token::Semicolon, Token::Semicolon]).unwrap(); + let _ = + parse::(&[Token::Semicolon, Token::Semicolon, Token::Semicolon]) + .unwrap(); } #[test] fn end_tokens_3() { - let _ = parse::(&[Token::Cell, Token::Semicolon]).expect_err(""); + let _ = parse::(&[Token::Cell, Token::Semicolon]).expect_err(""); } #[test] fn while_condition_1() { - assert!(parse::(&[ + assert!(parse::(&[ Token::While, Token::Name(String::from("x")), Token::OpenBrace, @@ -1630,27 +1611,9 @@ mod parser_tests { }])) } - #[test] - fn var_v() { - assert!(parse::(&[ - Token::Cell, - Token::Name(String::from("v")), - Token::Semicolon - ]) - .unwrap() - .iter() - .eq(&[Clause::DeclareVariable { - var: VariableDefinition { - name: String::from("v"), - var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::None - } - }])) - } - #[test] fn two_dimensional_1() { - assert!(parse::(&[ + assert!(parse::(&[ Token::Cell, Token::Name(String::from("x")), Token::At, @@ -1667,7 +1630,7 @@ mod parser_tests { #[test] fn two_dimensional_2() { - assert!(parse::(&[ + assert!(parse::(&[ Token::Cell, Token::Name(String::from("x")), Token::At, @@ -1691,7 +1654,7 @@ mod parser_tests { #[test] fn two_dimensional_3() { - assert!(parse::(&[ + assert!(parse::(&[ Token::Cell, Token::Name(String::from("xyz")), Token::At, @@ -1714,4 +1677,136 @@ mod parser_tests { } }])); } + + #[test] + fn var_v() { + assert!(parse::(&[ + Token::Cell, + Token::Name(String::from("v")), + Token::Semicolon + ]) + .unwrap() + .iter() + .eq(&[Clause::DeclareVariable { + var: VariableDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None + } + }])) + } + + #[test] + fn inline_bf_1() { + assert!(parse::(&[ + Token::Cell, + Token::Name(String::from("v")), + Token::Semicolon, + Token::Bf, + Token::OpenBrace, + Token::Plus, + Token::OpenBrace, + Token::Cell, + Token::Name(String::from("v")), + Token::Semicolon, + Token::ClosingBrace, + Token::Minus, + Token::ClosingBrace + ]) + .unwrap() + .iter() + .eq(&[ + Clause::DeclareVariable { + var: VariableDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None + } + }, + Clause::InlineBrainfuck { + location_specifier: LocationSpecifier::None, + clobbered_variables: vec![], + operations: vec![ + ExtendedOpcode::Opcode(Opcode2D::Add), + ExtendedOpcode::Block(vec![Clause::DeclareVariable { + var: VariableDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None + } + }]), + ExtendedOpcode::Opcode(Opcode2D::Subtract), + ] + } + ])) + } + + #[test] + fn inline_bf_2() { + assert!(parse::(&[ + Token::Cell, + Token::Name(String::from("v")), + Token::Semicolon, + Token::Bf, + Token::OpenBrace, + Token::Name(String::from("v")), + Token::OpenBrace, + Token::Cell, + Token::Name(String::from("v")), + Token::Semicolon, + Token::ClosingBrace, + Token::Caret, + Token::ClosingBrace + ]) + .unwrap() + .iter() + .eq(&[ + Clause::DeclareVariable { + var: VariableDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None + } + }, + Clause::InlineBrainfuck { + location_specifier: LocationSpecifier::None, + clobbered_variables: vec![], + operations: vec![ + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Block(vec![Clause::DeclareVariable { + var: VariableDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None + } + }]), + ExtendedOpcode::Opcode(Opcode2D::Up), + ] + } + ])) + } + + #[test] + fn inline_bf_3() { + assert!(parse::(&[ + Token::Bf, + Token::OpenBrace, + Token::Name(String::from("vvvv")), + Token::MoreThan, + Token::ClosingBrace + ]) + .unwrap() + .iter() + .eq(&[Clause::InlineBrainfuck { + location_specifier: LocationSpecifier::None, + clobbered_variables: vec![], + operations: vec![ + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Opcode(Opcode2D::Right), + ] + }])) + } } diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index e4204f5..440bbd7 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -4,9 +4,15 @@ #[cfg(test)] pub mod black_box_tests { use crate::{ - backend::{BrainfuckOpcodes, Opcode2D}, + backend::{ + bf::{Opcode, TapeCell}, + bf2d::{Opcode2D, TapeCell2D}, + common::{ + BrainfuckBuilder, BrainfuckBuilderData, BrainfuckProgram, CellAllocator, + CellAllocatorData, OpcodeVariant, TapeCellVariant, + }, + }, brainfuck::{bvm_tests::run_code, BrainfuckConfig}, - cells::{TapeCell, TapeCell2D, TapeCellVariant}, misc::{MastermindConfig, MastermindContext}, parser::parse, tokeniser::{tokenise, Token}, @@ -89,13 +95,18 @@ pub mod black_box_tests { const TESTING_BVM_MAX_STEPS: usize = 100_000_000; - fn compile_and_run>( + fn compile_and_run<'a, TC: 'static + TapeCellVariant, OC: 'static + OpcodeVariant>( program: String, input: String, - ) -> Result { + ) -> Result + where + BrainfuckBuilderData: BrainfuckBuilder, + CellAllocatorData<'a, TC>: CellAllocator, + Vec: BrainfuckProgram, + { let ctx = MastermindContext { config: OPT_NONE }; let tokens: Vec = tokenise(&program)?; - let clauses = parse::(&tokens)?; + let clauses = parse::(&tokens)?; let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); let bf_program = ctx.ir_to_bf(instructions, None)?; let bfs = bf_program.to_string(); @@ -109,15 +120,19 @@ pub mod black_box_tests { )) } - fn compile_program>( + fn compile_program<'a, TC: 'static + TapeCellVariant, OC: 'static + OpcodeVariant>( program: String, config: Option, - ) -> Result, String> { + ) -> Result, String> + where + BrainfuckBuilderData: BrainfuckBuilder, + CellAllocatorData<'a, TC>: CellAllocator, + { let ctx = MastermindContext { config: config.unwrap_or(OPT_NONE), }; let tokens: Vec = tokenise(&program)?; - let clauses = parse::(&tokens)?; + let clauses = parse::(&tokens)?; let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); let bf_code = ctx.ir_to_bf(instructions, None)?; @@ -129,7 +144,7 @@ pub mod black_box_tests { let program = String::from(""); let input = String::from(""); let desired_output = String::from(""); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -137,7 +152,7 @@ pub mod black_box_tests { // #[test] fn dummy_compile_fail_test() { let program = String::from(""); - let result = compile_program::(program, None); + let result = compile_program::(program, None); assert!(result.is_err()); } @@ -145,7 +160,7 @@ pub mod black_box_tests { fn dummy_code_test() { let program = String::from(""); let desired_code = String::from(""); - let code = compile_program::(program, None) + let code = compile_program::(program, None) .expect("") .to_string(); println!("{code}"); @@ -163,7 +178,7 @@ pub mod black_box_tests { let program = String::from(""); let input = String::from(""); let desired_output = String::from(""); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); assert_eq!(desired_output, output) } @@ -172,7 +187,7 @@ pub mod black_box_tests { let program = String::from(";"); let input = String::from(""); let desired_output = String::from(""); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); assert_eq!(desired_output, output) } @@ -181,7 +196,7 @@ pub mod black_box_tests { let program = String::from(";;;;;;"); let input = String::from(""); let desired_output = String::from(""); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); assert_eq!(desired_output, output) } @@ -190,7 +205,7 @@ pub mod black_box_tests { let program = String::from(";;{;{;};};;;"); let input = String::from(""); let desired_output = String::from(""); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); assert_eq!(desired_output, output) } @@ -218,7 +233,7 @@ output ten; let desired_output = String::from("hello\n"); assert_eq!( desired_output, - compile_and_run::(program, input).expect("") + compile_and_run::(program, input).expect("") ); } @@ -238,7 +253,7 @@ output 10; let desired_output = String::from("hello\n"); assert_eq!( desired_output, - compile_and_run::(program, input).expect("") + compile_and_run::(program, input).expect("") ) } @@ -261,7 +276,7 @@ output 70; ); let input = String::from(""); let desired_output = String::from("hello\n\n\0F"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -282,7 +297,7 @@ output "What?"; ); let input = String::from(""); let desired_output = String::from("Hello.\nWhat?"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -297,7 +312,7 @@ output ['o', '.', '\n']; ); let input = String::from(""); let desired_output = String::from("Hello.\n"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -311,7 +326,7 @@ output '@' + 256 + 1 + false + true + 'e' - '@'; ); let input = String::from(""); let desired_output = String::from("g"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -336,7 +351,7 @@ if q { ); let input = String::from(""); let desired_output = String::from("Hi friend!\npath b"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -368,7 +383,7 @@ if not_a - 'a' { ); let input = String::from(""); let desired_output = String::from("ACb"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -390,7 +405,7 @@ output A; ); let input = String::from(""); let desired_output = String::from("666666 G"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -407,7 +422,7 @@ output '0' + x; ); let input = String::from(""); let desired_output = String::from("56"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -424,7 +439,7 @@ output '0' + x; ); let input = String::from(""); let desired_output = String::from("56"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -440,7 +455,7 @@ output '0' + x; ); let input = String::from(""); let desired_output = String::from("5;"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -457,7 +472,7 @@ output '0' + x; ); let input = String::from(""); let desired_output = String::from("26"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -473,7 +488,7 @@ output '0' + x; ); let input = String::from(""); let desired_output = String::from("3"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -493,7 +508,7 @@ output *x; ); let input = String::from(""); let desired_output = String::from("82"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -513,7 +528,7 @@ output *x; ); let input = String::from(""); let desired_output = String::from("79"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -528,7 +543,7 @@ output x - 2; ); let input = String::from(""); let desired_output = String::from("~"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -545,7 +560,7 @@ output x + 'f' + 1; ); let input = String::from(""); let desired_output = String::from("f"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -562,7 +577,7 @@ output x + 'f'; ); let input = String::from(""); let desired_output = String::from("f"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -578,7 +593,7 @@ output x + 'f'; ); let input = String::from(""); let desired_output = String::from("f"); - let code = compile_program::(program, Some(OPT_ALL))?; + let code = compile_program::(program, Some(OPT_ALL))?; assert_eq!( desired_output, run_code(BVM_CONFIG_1D, code.to_string(), input, None) @@ -598,7 +613,7 @@ output x + 'f'; ); let input = String::from(""); let desired_output = String::from("f"); - let code = compile_program::(program, Some(OPT_ALL))?; + let code = compile_program::(program, Some(OPT_ALL))?; assert_eq!( desired_output, run_code(BVM_CONFIG_1D, code.to_string(), input, None) @@ -630,7 +645,7 @@ drain a { let desired_output = String::from("0AB\n1ABB\n2ABBB\n3ABBBB\n4ABBBBB\n5ABBBBBB\n6ABBBBBBB\n7ABBBBBBBB\n8ABBBBBBBBB\n9ABBBBBBBBBB\n"); assert_eq!( desired_output, - compile_and_run::(program, input).expect("") + compile_and_run::(program, input).expect("") ) } @@ -659,7 +674,7 @@ drain g into a {output a;} let desired_output = String::from("AABAA\nBBDAB\nCCGAC\nDDKAD\neefghi"); assert_eq!( desired_output, - compile_and_run::(program, input).expect("") + compile_and_run::(program, input).expect("") ) } @@ -675,7 +690,7 @@ output 'h'; let desired_output = String::from("h"); assert_eq!( desired_output, - compile_and_run::(program, input).expect("") + compile_and_run::(program, input).expect("") ) } @@ -716,7 +731,7 @@ output 10; ); let input = String::from(""); let desired_output = String::from("ACE\n"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -760,7 +775,7 @@ output 10; ); let input = String::from(""); let desired_output = String::from("ACE\n"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -779,7 +794,7 @@ output 10; ); let input = String::from(""); let desired_output = String::from("5\n"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -814,7 +829,7 @@ drain a { ); let input = String::from(""); let desired_output = String::from("0ABB\n1ABB\n2ABB\n3ABBBBBBBBBB\n4ABB\n5ABB\n"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -851,7 +866,7 @@ output 10; ); let input = String::from(""); let desired_output = String::from("010131\n"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -887,7 +902,7 @@ output 10; ); let input = String::from(""); let desired_output = String::from("01231\n"); - let code = compile_program::(program, Some(OPT_NONE))?.to_string(); + let code = compile_program::(program, Some(OPT_NONE))?.to_string(); println!("{}", code); let output = run_code(BVM_CONFIG_1D, code, input, None); println!("{output}"); @@ -961,7 +976,7 @@ fn func_2(cell[4] think, cell green) { ); let input = String::from(""); let desired_output = String::from("01202726631\n@1202726631\n"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -981,7 +996,7 @@ fn add_one(cell cel) { ); let input = String::from(""); let desired_output = String::from("ABCD"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1008,7 +1023,7 @@ fn add_one_to_three(cell[3] t) { ); let input = String::from(""); let desired_output = String::from("111"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1035,7 +1050,7 @@ fn add_one(cell t) { ); let input = String::from(""); let desired_output = String::from("12"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1073,7 +1088,7 @@ fn add_one(struct A t) { ); let input = String::from(""); let desired_output = String::from("12\n23"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1112,7 +1127,7 @@ fn add_one(struct A t, cell a) { ); let input = String::from(""); let desired_output = String::from("12\n33"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1156,7 +1171,7 @@ fn add_one(struct A tfoaishjdf, cell aaewofjas) { ); let input = String::from(""); let desired_output = String::from("12\n33"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1175,7 +1190,7 @@ output 10; ); let input = String::from(""); let desired_output = String::from("hello\n"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1201,7 +1216,7 @@ output 10; ); let input = String::from(""); let desired_output = String::from("hello\nhello: g\n"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1228,7 +1243,7 @@ output 10; ); let input = String::from(""); let desired_output = String::from("hello\nhello: g\n"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1245,7 +1260,7 @@ output b; ); let input = String::from("A"); let desired_output = String::from("B"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1272,7 +1287,7 @@ output b[0]; ); let input = String::from("ABC"); let desired_output = String::from("ABC\nDDD"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1301,7 +1316,7 @@ output c; ); let input = String::from(""); let desired_output = String::from("FooFpp\nZ"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1336,7 +1351,7 @@ output *v; ); let input = String::from(""); let desired_output = String::from("hhh hh hello"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1356,7 +1371,7 @@ output *v; ); let input = String::from(""); let desired_output = String::from("Freidns\n"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1376,7 +1391,7 @@ output f; ); let input = String::from(""); let desired_output = String::from("fFf"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1418,7 +1433,7 @@ output g[2][3]; ); let input = String::from(""); let desired_output = String::from("543112320003"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1444,7 +1459,7 @@ output '0' + a.yellow; ); let input = String::from(""); let desired_output = String::from("0064"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1471,7 +1486,7 @@ output '0' + a.yellow; ); let input = String::from(""); let desired_output = String::from("3452"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1500,7 +1515,7 @@ output a.green; ); let input = String::from("gh"); let desired_output = String::from("hg"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1533,7 +1548,7 @@ struct AA { ); let input = String::from("gh"); let desired_output = String::from("hg"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1571,7 +1586,7 @@ struct AA { ); let input = String::from("ghpalindrome"); let desired_output = String::from("nhg"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1605,7 +1620,7 @@ output '\n'; ); let input = String::from("hellow"); let desired_output = String::from("helowl\n"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1633,7 +1648,7 @@ output '\n'; ); let input = String::from("gy0123"); let desired_output = String::from("0123yg\n"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1671,7 +1686,7 @@ output '\n'; ); let input = String::from("gy-+t"); let desired_output = String::from("t-+yg\n"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1695,7 +1710,7 @@ output '\n'; ); let input = String::from("0123a"); let desired_output = String::from("a\n"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1718,7 +1733,7 @@ output '0' + as[1].green; ); let input = String::from(""); let desired_output = String::from("53"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1741,7 +1756,7 @@ struct AAA { ); let input = String::from(""); let desired_output = String::from("53"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1778,7 +1793,7 @@ output as[1].green; ); let input = String::from("tr"); let desired_output = String::from("HI\n6tr"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1826,7 +1841,7 @@ output as[1].bbb[2].green; ); let input = String::from("abcdefgh"); let desired_output = String::from("HI\ngabchdef"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1874,7 +1889,7 @@ output as[1].bbb[2].green; ); let input = String::from("abcdefgh"); let desired_output = String::from("HI\ngabchdef"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1909,7 +1924,7 @@ bf @2 { ); let input = String::from(""); let desired_output = String::from("jkl"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1932,7 +1947,7 @@ struct Frame f; ); let input = String::from(""); let desired_output = String::from(""); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1955,7 +1970,7 @@ struct Frame f; ); let input = String::from(""); let desired_output = String::from(""); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -1980,7 +1995,7 @@ output g.b; ); let input = String::from(""); let desired_output = String::from("ab"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2004,7 +2019,7 @@ bf @4 { ); let input = String::from(""); let desired_output = String::from("55"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2019,7 +2034,7 @@ output '0' + sizeof(cell); ); let input = String::from(""); let desired_output = String::from("1"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2034,7 +2049,7 @@ output '0' + sizeof(cell[5]); ); let input = String::from(""); let desired_output = String::from("5"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2053,7 +2068,7 @@ output '0' + sizeof(b[2]); ); let input = String::from(""); let desired_output = String::from("141"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2072,7 +2087,7 @@ output '0' + s; ); let input = String::from(""); let desired_output = String::from("1"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2091,7 +2106,7 @@ output '0' + s; ); let input = String::from(""); let desired_output = String::from("3"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2110,7 +2125,7 @@ output '0' + s; ); let input = String::from(""); let desired_output = String::from("6"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2130,7 +2145,7 @@ output '0' + sizeof(g); ); let input = String::from(""); let desired_output = String::from("2"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2154,7 +2169,7 @@ output '0' + sizeof(g[0].red); ); let input = String::from(""); let desired_output = String::from("115"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2175,7 +2190,7 @@ output '0' + sizeof(g[2].blue) ); let input = String::from(""); let desired_output = String::from("391"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2206,7 +2221,7 @@ output '0' + sizeof(g[2].blue) ); let input = String::from(""); let desired_output = String::from("23612"); - let output = compile_and_run::(program, input).expect(""); + let output = compile_and_run::(program, input).expect(""); println!("{output}"); assert_eq!(desired_output, output) } @@ -2226,7 +2241,7 @@ cell foo @3 = 2; output foo; "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from(""); @@ -2246,7 +2261,7 @@ cell foo @0 = 2; cell b = 10; "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); assert!(code.starts_with(">>>>>++++<<<<<++>++++++++++")); @@ -2262,7 +2277,7 @@ cell foo @0 = 2; cell b = 3; "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); assert!(code.starts_with(">+<++>>+++")); @@ -2279,11 +2294,11 @@ cell b = 3; "#, ); // assert_eq!( - // compile_program::(program, None).unwrap_err(), + // compile_program::(program, None).unwrap_err(), // "Location specifier @1 conflicts with another allocation" // ); // TODO: fix the need for this - assert!(compile_program::(program, None) + assert!(compile_program::(program, None) .unwrap_err() .contains("conflicts with another allocation")); } @@ -2296,7 +2311,7 @@ cell a = 'h'; bf @a {.} "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from("wxy"); @@ -2316,7 +2331,7 @@ cell[4] b; bf @a {.} "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from(""); @@ -2338,7 +2353,7 @@ bf @t.a { } "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from("wxy"); @@ -2361,7 +2376,7 @@ bf @t { } "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from("wxy"); @@ -2382,7 +2397,7 @@ output 10; output *f; "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from(""); @@ -2400,7 +2415,7 @@ cell[4] f @8 = "xyz "; bf @f {[.>]} "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from(""); @@ -2422,7 +2437,7 @@ cell a = '5'; func(a); "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from(""); @@ -2444,7 +2459,7 @@ cell[3] a = "456"; func(a[1]); "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from(""); @@ -2470,7 +2485,7 @@ a.r[2] = '6'; func(a.r[1]); "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from(""); @@ -2496,7 +2511,7 @@ a.r[2] = '6'; func(a); "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from(""); @@ -2522,7 +2537,7 @@ a.jj.j[1] = '4'; func(a.jj.j); "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let input = String::from(""); @@ -2543,7 +2558,7 @@ a = 0; output a; "#, ); - let code = compile_program::(program, Some(OPT_ALL))?.to_string(); + let code = compile_program::(program, Some(OPT_ALL))?.to_string(); println!("{code}"); assert!(code.starts_with("+++++.--.")); @@ -2561,7 +2576,7 @@ a = 0; output a; "#, ); - let code = compile_program::(program, Some(OPT_ALL))?.to_string(); + let code = compile_program::(program, Some(OPT_ALL))?.to_string(); println!("{code}"); assert!(code.starts_with("++.[-].")); @@ -2578,7 +2593,7 @@ bf { } "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); assert_eq!( @@ -2603,7 +2618,7 @@ bf @3 { } "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); assert!(code.starts_with( @@ -2638,7 +2653,7 @@ bf @0 clobbers *str { assert *str equals 0; "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); assert!(code.starts_with(",>,>,<<[+>]<<<[.[-]>]<<<")); @@ -2670,7 +2685,7 @@ bf { } "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let output = run_code(BVM_CONFIG_1D, code, String::from("line of input\n"), None); @@ -2708,7 +2723,7 @@ bf { } "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); let output = run_code(BVM_CONFIG_1D, code, String::from("hello\n"), None); @@ -2731,7 +2746,7 @@ bf { } "#, ); - let result = compile_program::(program, None); + let result = compile_program::(program, None); assert!(result.is_err()); Ok(()) @@ -2748,7 +2763,7 @@ bf { } "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); println!("{code}"); assert_eq!(code, ",>,>,<<>>>>>+[-]<<<<<"); @@ -2761,7 +2776,7 @@ bf { bf {,.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvvv.+++.------.vv-.^^^^+.} "#, ); - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None)?.to_string(); assert_eq!( code, @@ -2780,7 +2795,7 @@ bf { bf {,.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvstvv.+++.------.vv-.^^^^+.} "#, ); - let _result = compile_program::(program, None); + let _result = compile_program::(program, None); } #[test] @@ -2805,7 +2820,7 @@ output 'h'; let input = String::from(""); let desired_output = String::from("h"); - let code = compile_program::(program, Some(OPT_ALL))?; + let code = compile_program::(program, Some(OPT_ALL))?; println!("{}", code.clone().to_string()); assert_eq!( desired_output, @@ -2831,7 +2846,7 @@ output a + 3; let input = String::from(""); let desired_output = String::from("tIJ"); - let code = compile_program::(program, Some(OPT_ALL))?.to_string(); + let code = compile_program::(program, Some(OPT_ALL))?.to_string(); println!("{}", code); assert_eq!(desired_output, run_code(BVM_CONFIG_1D, code, input, None)); @@ -2860,7 +2875,7 @@ output a + 3; memory_allocation_method: 128, enable_2d_grid: false, }; - let _code = compile_program::(program, Some(cfg)); + let _code = compile_program::(program, Some(cfg)); } #[test] @@ -2873,7 +2888,7 @@ cell b = 3; "#, ); assert_eq!( - compile_program::(program, None)?.to_string(), + compile_program::(program, None)?.to_string(), ">^^++++" ); Ok(()) @@ -2892,7 +2907,7 @@ cell b = 3; "#, ); assert_eq!( - compile_program::(program, None)?.to_string(), + compile_program::(program, None)?.to_string(), ">^^[-]+>>>>>[-]++>>>>>[-]+++<<<<<<<<<<+++" ); Ok(()) @@ -2908,7 +2923,7 @@ cell b = 3; "#, ); assert_eq!( - compile_program::(program, None).unwrap_err(), + compile_program::(program, None).unwrap_err(), "Location specifier @(1, 3) conflicts with another allocation" ); } @@ -2923,7 +2938,7 @@ cell b = 3; "#, ); assert_eq!( - compile_program::(program, None).unwrap_err(), + compile_program::(program, None).unwrap_err(), "Location specifier @(2, 0) conflicts with another allocation" ); } @@ -2937,7 +2952,7 @@ cell[4] b @(0, 4); "#, ); assert_eq!( - compile_program::(program, None).unwrap_err(), + compile_program::(program, None).unwrap_err(), "Location specifier @(0, 4) conflicts with another allocation" ); } @@ -2959,7 +2974,7 @@ cell j = 1; ); let desired_output = String::from("+vv+^^+>vv+^+^+"); - let code = compile_program::(program, Some(OPT_NONE_TILES))?.to_string(); + let code = compile_program::(program, Some(OPT_NONE_TILES))?.to_string(); assert_eq!(desired_output, code); Ok(()) @@ -2991,7 +3006,7 @@ output i; let input = String::from(""); let desired_output = String::from("123456789"); - let code = compile_program::(program, Some(OPT_NONE_TILES))?.to_string(); + let code = compile_program::(program, Some(OPT_NONE_TILES))?.to_string(); println!("{}", code); assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); @@ -3009,7 +3024,7 @@ cell[4] b @(0, 4); "#, ); assert_eq!( - compile_program::(program, Some(OPT_NONE_TILES)).unwrap_err(), + compile_program::(program, Some(OPT_NONE_TILES)).unwrap_err(), "Location specifier @(0, 4) conflicts with another allocation" ); } @@ -3032,7 +3047,7 @@ output b[3]; output a; "#, ); - let code = compile_program::(program, Some(OPT_NONE_TILES))?.to_string(); + let code = compile_program::(program, Some(OPT_NONE_TILES))?.to_string(); println!("{}", code); let input = String::from(""); let desired_output = String::from("12345"); @@ -3057,7 +3072,8 @@ cell j = 1; ); let desired_output = String::from("+>+<^+>>v+<^+<^+>>>vv+<^+<^+"); - let code = compile_program::(program, Some(OPT_NONE_ZIG_ZAG))?.to_string(); + let code = + compile_program::(program, Some(OPT_NONE_ZIG_ZAG))?.to_string(); assert_eq!(desired_output, code); Ok(()) @@ -3089,7 +3105,8 @@ output i; let input = String::from(""); let desired_output = String::from("123456789"); - let code = compile_program::(program, Some(OPT_NONE_ZIG_ZAG))?.to_string(); + let code = + compile_program::(program, Some(OPT_NONE_ZIG_ZAG))?.to_string(); println!("{}", code); assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); @@ -3107,7 +3124,7 @@ cell[4] b @(0, 4); "#, ); assert_eq!( - compile_program::(program, Some(OPT_NONE_ZIG_ZAG)).unwrap_err(), + compile_program::(program, Some(OPT_NONE_ZIG_ZAG)).unwrap_err(), "Location specifier @(0, 4) conflicts with another allocation" ); } @@ -3130,7 +3147,8 @@ output b[3]; output a; "#, ); - let code = compile_program::(program, Some(OPT_NONE_ZIG_ZAG))?.to_string(); + let code = + compile_program::(program, Some(OPT_NONE_ZIG_ZAG))?.to_string(); println!("{}", code); let input = String::from(""); let desired_output = String::from("12345"); @@ -3156,7 +3174,7 @@ cell j = 1; let desired_output = String::from("^+>+v+<+<+^+^+>+>+"); // TODO: fix this, this should fail in its current state - let code = compile_program::(program, Some(OPT_NONE_SPIRAL))?.to_string(); + let code = compile_program::(program, Some(OPT_NONE_SPIRAL))?.to_string(); println!("{code}"); assert_eq!(desired_output, code); @@ -3189,7 +3207,7 @@ output i; let input = String::from(""); let desired_output = String::from("123456789"); - let code = compile_program::(program, Some(OPT_NONE_SPIRAL))?.to_string(); + let code = compile_program::(program, Some(OPT_NONE_SPIRAL))?.to_string(); println!("{}", code); assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); @@ -3207,7 +3225,7 @@ cell[4] b @(0, 4); "#, ); assert_eq!( - compile_program::(program, Some(OPT_NONE_SPIRAL)).unwrap_err(), + compile_program::(program, Some(OPT_NONE_SPIRAL)).unwrap_err(), "Location specifier @(0,4) conflicts with another allocation" ); } @@ -3230,7 +3248,7 @@ output b[3]; output a; "#, ); - let code = compile_program::(program, Some(OPT_NONE_SPIRAL))?.to_string(); + let code = compile_program::(program, Some(OPT_NONE_SPIRAL))?.to_string(); println!("{}", code); let input = String::from(""); let desired_output = String::from("12345"); From afbd34b1105eb97d6fb6ef73370c993c5a27c774 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Thu, 23 Oct 2025 23:12:33 +1100 Subject: [PATCH 20/56] Fix allocator lifetime issue and refactor black box tests --- compiler/src/backend/bf.rs | 2 +- compiler/src/backend/bf2d.rs | 2 +- compiler/src/backend/common.rs | 18 +- compiler/src/brainfuck.rs | 246 +++-- compiler/src/frontend.rs | 2 +- compiler/src/main.rs | 24 +- compiler/src/misc.rs | 2 +- compiler/src/tests.rs | 1840 ++++++++++++-------------------- compiler/src/tokeniser.rs | 2 +- 9 files changed, 888 insertions(+), 1250 deletions(-) diff --git a/compiler/src/backend/bf.rs b/compiler/src/backend/bf.rs index e8bf42e..89a68eb 100644 --- a/compiler/src/backend/bf.rs +++ b/compiler/src/backend/bf.rs @@ -47,7 +47,7 @@ impl OpcodeVariant for Opcode { } } -impl CellAllocator for CellAllocatorData<'_, TapeCell> { +impl CellAllocator for CellAllocatorData { /// Check if the desired number of cells can be allocated to the right of a given location fn check_allocatable(&mut self, location: &TapeCell, size: usize) -> bool { for k in 0..size { diff --git a/compiler/src/backend/bf2d.rs b/compiler/src/backend/bf2d.rs index 432f2c6..56b9446 100644 --- a/compiler/src/backend/bf2d.rs +++ b/compiler/src/backend/bf2d.rs @@ -113,7 +113,7 @@ impl Display for TapeCell2D { } } -impl CellAllocator for CellAllocatorData<'_, TapeCell2D> { +impl CellAllocator for CellAllocatorData { /// Check if the desired number of cells can be allocated to the right of a given location fn check_allocatable(&mut self, location: &TapeCell2D, size: usize) -> bool { for k in 0..size { diff --git a/compiler/src/backend/common.rs b/compiler/src/backend/common.rs index a99aa02..051dfac 100644 --- a/compiler/src/backend/common.rs +++ b/compiler/src/backend/common.rs @@ -15,17 +15,17 @@ use std::{ type LoopDepth = usize; type TapeValue = u8; -impl MastermindContext { - pub fn ir_to_bf<'a, TC: TapeCellVariant, OC: OpcodeVariant>( - &'a self, +impl<'a> MastermindContext { + pub fn ir_to_bf( + &self, instructions: Vec>, return_to_cell: Option, ) -> Result, String> where BrainfuckBuilderData: BrainfuckBuilder, - CellAllocatorData<'a, TC>: CellAllocator, + CellAllocatorData: CellAllocator, { - let mut allocator = CellAllocatorData::new(&self.config); + let mut allocator = CellAllocatorData::new(self.config.clone()); struct AllocationMapEntry { cell_base: TC, @@ -422,12 +422,12 @@ where fn from_token(token: &Token) -> Result; } -pub struct CellAllocatorData<'a, TC> { +pub struct CellAllocatorData { pub cells: HashSet, - pub config: &'a MastermindConfig, + pub config: MastermindConfig, } -impl CellAllocatorData<'_, T> { - fn new(config: &MastermindConfig) -> CellAllocatorData { +impl CellAllocatorData { + fn new(config: MastermindConfig) -> CellAllocatorData { CellAllocatorData { cells: HashSet::new(), config, diff --git a/compiler/src/brainfuck.rs b/compiler/src/brainfuck.rs index 27f9c82..86fac90 100644 --- a/compiler/src/brainfuck.rs +++ b/compiler/src/brainfuck.rs @@ -308,26 +308,25 @@ pub mod bvm_tests { pub fn run_code( config: BrainfuckConfig, - code: String, - input: String, + code: &str, + input: &str, max_steps_cutoff: Option, - ) -> String { + ) -> Result { let ctx = BrainfuckContext { config }; let input_bytes: Vec = input.bytes().collect(); let mut input_stream = Cursor::new(input_bytes); - let mut output_stream = Cursor::new(Vec::new()); + let mut output_stream = Cursor::new(vec![]); ctx.run( code.chars().collect(), &mut input_stream, &mut output_stream, max_steps_cutoff, - ) - .unwrap(); + )?; // TODO: fix this unsafe stuff - unsafe { String::from_utf8_unchecked(output_stream.into_inner()) } + Ok(unsafe { String::from_utf8_unchecked(output_stream.into_inner()) }) } const BVM_CONFIG_1D: BrainfuckConfig = BrainfuckConfig { enable_debug_symbols: false, @@ -338,152 +337,225 @@ pub mod bvm_tests { enable_2d_grid: true, }; - #[test] - fn dummy_test() { - let program = String::from(""); - let input = String::from(""); - let desired_output = String::from(""); - assert_eq!( - desired_output, - run_code(BVM_CONFIG_1D, program, input, None) - ) - } - #[test] fn hello_world_1() { - let program = String::from("++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++."); - let input = String::from(""); - let desired_output = String::from("Hello World!\n"); assert_eq!( - desired_output, - run_code(BVM_CONFIG_1D, program, input, None) - ) + run_code( + BVM_CONFIG_1D, + "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.\ ++++.------.--------.>>+.>++.", + "", + None + ) + .unwrap(), + "Hello World!\n" + ); } #[test] fn hello_world_2() { - let program = String::from( - "+[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+.", - ); - let input = String::from(""); - let desired_output = String::from("Hello, World!"); assert_eq!( - desired_output, - run_code(BVM_CONFIG_1D, program, input, None) + run_code( + BVM_CONFIG_1D, + "+[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+.", + "", + None + ) + .unwrap(), + "Hello, World!" ) } #[test] fn random_mess() { - let program = String::from("+++++[>+++++[>++>++>+++>+++>++++>++++<<<<<<-]<-]+++++[>>[>]<[+.<<]>[++.>>>]<[+.<]>[-.>>]<[-.<<<]>[.>]<[+.<]<-]++++++++++."); - let input = String::from(""); - let desired_output = String::from("eL34NfeOL454KdeJ44JOdefePK55gQ67ShfTL787KegJ77JTeghfUK88iV9:XjgYL:;:KfiJ::JYfijgZK;;k[<=]lh^L=>=KgkJ==J^gklh_K>>m`?@bnicL@A@KhmJ@@JchmnidKAA\n"); + // test case stolen from https://code.golf assert_eq!( - desired_output, - run_code(BVM_CONFIG_1D, program, input, None) + run_code( + BVM_CONFIG_1D, + "+++++[>+++++[>++>++>+++>+++>++++>++++<<<<<<-]<-]+++++[>>[>]<[+.<<]>[++.>>>]<[+\ +.<]>[-.>>]<[-.<<<]>[.>]<[+.<]<-]++++++++++.", + "", + None + ) + .unwrap(), + "eL34NfeOL454KdeJ44JOdefePK55gQ67ShfTL787KegJ77JTeghfUK88iV9:XjgYL:;:KfiJ::JYfi\ +jgZK;;k[<=]lh^L=>=KgkJ==J^gklh_K>>m`?@bnicL@A@KhmJ@@JchmnidKAA\n" ) } #[test] - #[should_panic(expected = "2D Brainfuck currently disabled")] fn grid_disabled_1() { - let program = String::from("++++++++[->++++++[->+>+<<]<]>>.>^+++."); - let input = String::from(""); - run_code(BVM_CONFIG_1D, program, input, None); + assert_eq!( + run_code( + BVM_CONFIG_1D, + "++++++++[->++++++[->+>+<<]<]>>.>^+++.", + "", + None, + ) + .unwrap_err(), + "2D Brainfuck currently disabled" + ); } #[test] #[should_panic(expected = "2D Brainfuck currently disabled")] fn grid_disabled_2() { - let program = - String::from("++++++++[->^^^+++vvvv+++[->^^^^+>+^^^^^^^^>.>vvvv+++."); - let input = String::from(""); - run_code(BVM_CONFIG_1D, program, input, None); + assert_eq!( + run_code( + BVM_CONFIG_1D, + "++++++++[->^^^+++vvvv+++[->^^^^+>+^^^^^^^^>.>vvvv+++.", + "", + None, + ) + .unwrap_err(), + "2D Brainfuck currently disabled" + ); } // 2D tests: #[test] fn grid_regression_1() { - // hello world - let program = String::from("++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++."); - let input = String::from(""); - let desired_output = String::from("Hello World!\n"); assert_eq!( - desired_output, - run_code(BVM_CONFIG_2D, program, input, None) + run_code( + BVM_CONFIG_2D, + "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.\ ++++.------.--------.>>+.>++.", + "", + None + ) + .unwrap(), + "Hello World!\n" ) } #[test] fn grid_regression_2() { - // random mess - let program = String::from("+++++[>+++++[>++>++>+++>+++>++++>++++<<<<<<-]<-]+++++[>>[>]<[+.<<]>[++.>>>]<[+.<]>[-.>>]<[-.<<<]>[.>]<[+.<]<-]++++++++++."); - let input = String::from(""); - let desired_output = String::from("eL34NfeOL454KdeJ44JOdefePK55gQ67ShfTL787KegJ77JTeghfUK88iV9:XjgYL:;:KfiJ::JYfijgZK;;k[<=]lh^L=>=KgkJ==J^gklh_K>>m`?@bnicL@A@KhmJ@@JchmnidKAA\n"); + // test case stolen from https://code.golf assert_eq!( - desired_output, - run_code(BVM_CONFIG_2D, program, input, None) + run_code( + BVM_CONFIG_2D, + "+++++[>+++++[>++>++>+++>+++>++++>++++<<<<<<-]<-]+++++[>>[>]<[+.<<]>[++.>>>]<[+\ +.<]>[-.>>]<[-.<<<]>[.>]<[+.<]<-]++++++++++.", + "", + None + ) + .unwrap(), + "eL34NfeOL454KdeJ44JOdefePK55gQ67ShfTL787KegJ77JTeghfUK88iV9:XjgYL:;:KfiJ::JYfi\ +jgZK;;k[<=]lh^L=>=KgkJ==J^gklh_K>>m`?@bnicL@A@KhmJ@@JchmnidKAA\n" ) } #[test] fn grid_basic_1() { - let program = String::from("++++++++[-^++++++[->+v+<^]v]>+++++^.v."); - let input = String::from(""); - let desired_output = String::from("05"); assert_eq!( - desired_output, - run_code(BVM_CONFIG_2D, program, input, None) + run_code( + BVM_CONFIG_2D, + "++++++++[-^++++++[->+v+<^]v]>+++++^.v.", + "", + None + ) + .unwrap(), + "05" ) } #[test] fn grid_mover_1() { - let program = String::from( - "-<<<<<<<<<<<<^^^^^^^^^^^^-<^++++++++[->>vv+[->v+]->v++++++<^<^+[-<^+]-<^]>>vv+[->v+]->v...", - ); - let input = String::from(""); - let desired_output = String::from("000"); assert_eq!( - desired_output, - run_code(BVM_CONFIG_2D, program, input, None) + run_code( + BVM_CONFIG_2D, + "-<<<<<<<<<<<<^^^^^^^^^^^^-<^++++++++[->>vv+[->v+]->v++++++<^<^+[-<^+]-<^]>>vv+\ +[->v+]->v...", + "", + None + ) + .unwrap(), + "000", ) } #[test] fn grid_bfception_1() { - // run a hello world program within a 1d brainfuck interpreter implemented in 2d brainfuck - let program = String::from("-v>,[>,]^-<+[-<+]->+[-v------------------------------------------^>+]-<+[-<+]->+[-v[-^+^+vv]^[-v+^]^->+<[>-<->+<[>-<->+<[>-<->+<[>-<-------------->+<[>-<-->+<[>-<----------------------------->+<[>-<-->+<[>-[[-]<[-]vv[-]++++++v++^^^>]<[-]]>[[-]<[-]vv[-]+++++v+^^^>]<[-]]>[[-]<[-]vv[-]+++^^>]<[-]]>[[-]<[-]vv[-]++++^^>]<[-]]>[[-]<[-]vv[-]+++++++^^>]<[-]]>[[-]<[-]vv[-]++^^>]<[-]]>[[-]<[-]vv[-]++++++++^^>]<[-]]>[[-]<[-]vv[-]+^^>]+]-v-v-v-v-^^^^<+[-<+]<->v-v-v<-v->^^^^>vvv+^^^<+>+[-<->+v[-^^+^+vvv]^^[-vv+^^]^>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<[-]]>[--[+>-]+v,^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v.^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v[-v+v+^^]v[-^+v]v[[-]^^^+[-<+]-^^^+[->+]-<+[>>-[+>-]<+vv[-^^^+^+vvvv]^^^[-vvv+^^^]^->+<[>-<->+<[>-<[-]]>[->-[+>-]+^^>]<]>[->-[+>-]+^^>]vvv+[-<+]->-[+>-]+vvv]^^^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v[-v+v+^^]v[-^+v]v>+<[>-<[-]]>[-<^^^+[-<+]-^^^+[->+]-<+[>>-[+>-]>+vv[-^^^+^+vvvv]^^^[-vvv+^^^]^->+<[>-<->+<[>-<[-]]>[->-[+>-]+^^>]<]>[->-[+>-]+^^>]vvv+[-<+]->-[+>-]+vvv>]<^^^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+<<-v-^>+v+^[<+v+^>-v-^]+>-+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+>>-v-^<+v+^[>+v+^<-v-^]+<-+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v-^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v+^+[-<+]-<^^^+[->+]->-[+>-]+^^>]+]-"); - let input = String::from("++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++.\n"); - let desired_output = String::from("Hello World!\n"); + // hello world run inside a brainfuck interpreter written in 2d brainfuck assert_eq!( - desired_output, - run_code(BVM_CONFIG_2D, program, input, None) + run_code( + BVM_CONFIG_2D, + "-v>,[>,]^-<+[-<+]->+[-v------------------------------------------^>+]-<+[-<+]\ +->+[-v[-^+^+vv]^[-v+^]^->+<[>-<->+<[>-<->+<[>-<->+<[>-<-------------->+<[>-<-->\ ++<[>-<----------------------------->+<[>-<-->+<[>-[[-]<[-]vv[-]+++\ ++++v++^^^>]<[-]]>[[-]<[-]vv[-]+++++v+^^^>]<[-]]>[[-]<[-]vv[-]+++^^>]<[-]]>[[-]<\ +[-]vv[-]++++^^>]<[-]]>[[-]<[-]vv[-]+++++++^^>]<[-]]>[[-]<[-]vv[-]++^^>]<[-]]>[[\ +-]<[-]vv[-]++++++++^^>]<[-]]>[[-]<[-]vv[-]+^^>]+]-v-v-v-v-^^^^<+[-<+]<->v-\ +v-v<-v->^^^^>vvv+^^^<+>+[-<->+v[-^^+^+vvv]^^[-vv+^^]^>+<-[>[-]<>+<-[>[-]<>+<-[>\ +[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<[-]]>[--[+>\ +-]+v,^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v.^+[-<+]-<^^^+[-\ +>+]->-[+>-]+^^>]<]>[--[+>-]+v[-v+v+^^]v[-^+v]v[[-]^^^+[-<+]-^^^+[\ +->+]-<+[>>-[+>-]<+vv[-^^^+^+vvvv]^^^[-vvv+^^^]^->+<[>-<->+<[>-<[-]]>[->-[+>-]+^^>]<]>[->-[+>-]+^^>]vvv+[-<+]->-[+>-]\ ++vvv]^^^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v[-v+v+^^]v[-^+\ +v]v>+<[>-<[-]]>[-<^^^+[-<+]-^^^+[->+]-<+[>>-[+>-]>+vv[-^^^+^+vvvv]^^^[-vvv+^^^]\ +^->+<[>-<->+<[>-<[-]]>[->-[+>-]+^^>]<]>[->-[+>-]+^^>]\ +vvv+[-<+]->-[+>-]+vvv>]<^^^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[-<\ +vvvvv+[-<+]->-[+>-]+<<-v-^>+v+^[<+v+^>-v-^]+>-+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>\ +[--[+>-]+>>-v-^<+v+^[>+v+^<-v-^]+<-+[-<+]-<^^^+[->+]->-[+>-]+^^>]\ +<]>[--[+>-]+v-^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--\ +[+>-]+v+^+[-<+]-<^^^+[->+]->-[+>-]+^^>]+]-", + "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]>>.>---.+++++++..+++.>>.<-.<.\ ++++.------.--------.>>+.>++.\n", + None + ) + .unwrap(), + "Hello World!\n" ) } #[test] fn grid_bfception_2() { - // random mess - let program = String::from("-v>,[>,]^-<+[-<+]->+[-v------------------------------------------^>+]-<+[-<+]->+[-v[-^+^+vv]^[-v+^]^->+<[>-<->+<[>-<->+<[>-<->+<[>-<-------------->+<[>-<-->+<[>-<----------------------------->+<[>-<-->+<[>-[[-]<[-]vv[-]++++++v++^^^>]<[-]]>[[-]<[-]vv[-]+++++v+^^^>]<[-]]>[[-]<[-]vv[-]+++^^>]<[-]]>[[-]<[-]vv[-]++++^^>]<[-]]>[[-]<[-]vv[-]+++++++^^>]<[-]]>[[-]<[-]vv[-]++^^>]<[-]]>[[-]<[-]vv[-]++++++++^^>]<[-]]>[[-]<[-]vv[-]+^^>]+]-v-v-v-v-^^^^<+[-<+]<->v-v-v<-v->^^^^>vvv+^^^<+>+[-<->+v[-^^+^+vvv]^^[-vv+^^]^>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<[-]]>[--[+>-]+v,^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v.^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v[-v+v+^^]v[-^+v]v[[-]^^^+[-<+]-^^^+[->+]-<+[>>-[+>-]<+vv[-^^^+^+vvvv]^^^[-vvv+^^^]^->+<[>-<->+<[>-<[-]]>[->-[+>-]+^^>]<]>[->-[+>-]+^^>]vvv+[-<+]->-[+>-]+vvv]^^^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v[-v+v+^^]v[-^+v]v>+<[>-<[-]]>[-<^^^+[-<+]-^^^+[->+]-<+[>>-[+>-]>+vv[-^^^+^+vvvv]^^^[-vvv+^^^]^->+<[>-<->+<[>-<[-]]>[->-[+>-]+^^>]<]>[->-[+>-]+^^>]vvv+[-<+]->-[+>-]+vvv>]<^^^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+<<-v-^>+v+^[<+v+^>-v-^]+>-+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+>>-v-^<+v+^[>+v+^<-v-^]+<-+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v-^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v+^+[-<+]-<^^^+[->+]->-[+>-]+^^>]+]-"); - let input = String::from("+++++[>+++++[>++>++>+++>+++>++++>++++<<<<<<-]<-]+++++[>>[>]<[+.<<]>[++.>>>]<[+.<]>[-.>>]<[-.<<<]>[.>]<[+.<]<-]++++++++++.\n"); - let desired_output = String::from("eL34NfeOL454KdeJ44JOdefePK55gQ67ShfTL787KegJ77JTeghfUK88iV9:XjgYL:;:KfiJ::JYfijgZK;;k[<=]lh^L=>=KgkJ==J^gklh_K>>m`?@bnicL@A@KhmJ@@JchmnidKAA\n"); + // random mess test from https://code.golf run in brainfuck interpreter written in 2d brainfuck assert_eq!( - desired_output, - run_code(BVM_CONFIG_2D, program, input, None) + run_code( + BVM_CONFIG_2D, + "-v>,[>,]^-<+[-<+]->+[-v------------------------------------------^>+]-<+[-<+]-\ +>+[-v[-^+^+vv]^[-v+^]^->+<[>-<->+<[>-<->+<[>-<->+<[>-<-------------->+<[>-<-->+\ +<[>-<----------------------------->+<[>-<-->+<[>-[[-]<[-]vv[-]++++\ +++v++^^^>]<[-]]>[[-]<[-]vv[-]+++++v+^^^>]<[-]]>[[-]<[-]vv[-]+++^^>]<[-]]>[[-]<[\ +-]vv[-]++++^^>]<[-]]>[[-]<[-]vv[-]+++++++^^>]<[-]]>[[-]<[-]vv[-]++^^>]<[-]]>[[-\ +]<[-]vv[-]++++++++^^>]<[-]]>[[-]<[-]vv[-]+^^>]+]-v-v-v-v-^^^^<+[-<+]<->v-v\ +-v<-v->^^^^>vvv+^^^<+>+[-<->+v[-^^+^+vvv]^^[-vv+^^]^>+<-[>[-]<>+<-[>[-]<>+<-[>[\ +-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<>+<-[>[-]<[-]]>[--[+>-\ +]+v,^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v.^+[-<+]-<^^^+[->\ ++]->-[+>-]+^^>]<]>[--[+>-]+v[-v+v+^^]v[-^+v]v[[-]^^^+[-<+]-^^^+[-\ +>+]-<+[>>-[+>-]<+vv[-^^^+^+vvvv]^^^[-vvv+^^^]^->+<[>-<->+<[>-<[-]]>[->-[+>-]+^^>]<]>[->-[+>-]+^^>]vvv+[-<+]->-[+>-]+\ +vvv]^^^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+v[-v+v+^^]v[-^+v\ +]v>+<[>-<[-]]>[-<^^^+[-<+]-^^^+[->+]-<+[>>-[+>-]>+vv[-^^^+^+vvvv]^^^[-vvv+^^^]^\ +->+<[>-<->+<[>-<[-]]>[->-[+>-]+^^>]<]>[->-[+>-]+^^>]<\ +vv+[-<+]-<][-]>vvv+[-<+]->-[+>-]+vvv>]<^^^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[+>-]+<<-v-^>+v+^[<+v+^>-v-^]+>-+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[\ +--[+>-]+>>-v-^<+v+^[>+v+^<-v-^]+<-+[-<+]-<^^^+[->+]->-[+>-]+^^>]<\ +]>[--[+>-]+v-^+[-<+]-<^^^+[->+]->-[+>-]+^^>]<]>[--[\ ++>-]+v+^+[-<+]-<^^^+[->+]->-[+>-]+^^>]+]-", + "+++++[>+++++[>++>++>+++>+++>++++>++++<<<<<<-]<-]+++++[>>[>]<[+.<<]>[++.>>>]<[+\ +.<]>[-.>>]<[-.<<<]>[.>]<[+.<]<-]++++++++++.\n", + None + ) + .unwrap(), + "eL34NfeOL454KdeJ44JOdefePK55gQ67ShfTL787KegJ77JTeghfUK88iV9:XjgYL:;:KfiJ::JYfi\ +jgZK;;k[<=]lh^L=>=KgkJ==J^gklh_K>>m`?@bnicL@A@KhmJ@@JchmnidKAA\n" ) } #[test] fn test_bf2d_code() { - let program = String::from( - ",.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvvv.+++.------.vv-.^^^^+.", - ); - let input = String::from(""); - let desired_output = String::from("\0Hello, World!"); assert_eq!( - desired_output, - run_code(BVM_CONFIG_2D, program, input, None) + run_code( + BVM_CONFIG_2D, + ",.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvvv.+++.------.vv-.^^^^+.", + "", + None + ) + .unwrap(), + "\0Hello, World!" ) } } diff --git a/compiler/src/frontend.rs b/compiler/src/frontend.rs index be600e6..fb78460 100644 --- a/compiler/src/frontend.rs +++ b/compiler/src/frontend.rs @@ -23,7 +23,7 @@ impl MastermindContext { ) -> Result, String> where BrainfuckBuilderData: BrainfuckBuilder, - CellAllocatorData<'a, TC>: CellAllocator, + CellAllocatorData: CellAllocator, { let mut scope = if let Some(outer) = outer_scope { outer.open_inner() diff --git a/compiler/src/main.rs b/compiler/src/main.rs index 3076cfe..c4f393b 100644 --- a/compiler/src/main.rs +++ b/compiler/src/main.rs @@ -14,6 +14,11 @@ mod preprocessor; mod tests; mod tokeniser; use crate::{ + backend::{ + bf::{Opcode, TapeCell}, + bf2d::{Opcode2D, TapeCell2D}, + common::BrainfuckProgram, + }, brainfuck::{BrainfuckConfig, BrainfuckContext}, misc::{MastermindConfig, MastermindContext}, parser::parse, @@ -95,20 +100,23 @@ fn main() -> Result<(), String> { true => { // compile the provided file let tokens = tokenise(&program)?; - let bf_code = if ctx.config.enable_2d_grid { + if ctx.config.enable_2d_grid { let parsed_syntax = parse::(&tokens)?; let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); - ctx.ir_to_bf(instructions, None)? + let bf_code = ctx.ir_to_bf(instructions, None)?; + bf_code.to_string() } else { let parsed_syntax = parse::(&tokens)?; let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); - ctx.ir_to_bf(instructions, None)? - }; - - match ctx.config.optimise_generated_code { - true => ctx.optimise_bf_code(bf_code).to_string(), - false => bf_code.to_string(), + let bf_code = ctx.ir_to_bf(instructions, None)?; + bf_code.to_string() } + + // TODO: fix optimisations + // match ctx.config.optimise_generated_code { + // true => ctx.optimise_bf_code(bf_code).to_string(), + // false => bf_code.to_string(), + // } } false => program, }; diff --git a/compiler/src/misc.rs b/compiler/src/misc.rs index dfb2b21..17c1774 100644 --- a/compiler/src/misc.rs +++ b/compiler/src/misc.rs @@ -1,4 +1,4 @@ -#[derive(serde::Deserialize)] +#[derive(Clone, serde::Deserialize)] pub struct MastermindConfig { // basic pure brainfuck optimisations pub optimise_generated_code: bool, diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index 440bbd7..ed17789 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -96,42 +96,37 @@ pub mod black_box_tests { const TESTING_BVM_MAX_STEPS: usize = 100_000_000; fn compile_and_run<'a, TC: 'static + TapeCellVariant, OC: 'static + OpcodeVariant>( - program: String, - input: String, + program: &str, + input: &str, ) -> Result where BrainfuckBuilderData: BrainfuckBuilder, - CellAllocatorData<'a, TC>: CellAllocator, + CellAllocatorData: CellAllocator, Vec: BrainfuckProgram, { let ctx = MastermindContext { config: OPT_NONE }; - let tokens: Vec = tokenise(&program)?; + let tokens: Vec = tokenise(program)?; let clauses = parse::(&tokens)?; let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); let bf_program = ctx.ir_to_bf(instructions, None)?; let bfs = bf_program.to_string(); // run generated brainfuck with input - Ok(run_code( - BVM_CONFIG_1D, - bfs, - input, - Some(TESTING_BVM_MAX_STEPS), - )) + run_code(BVM_CONFIG_1D, &bfs, input, Some(TESTING_BVM_MAX_STEPS)) } fn compile_program<'a, TC: 'static + TapeCellVariant, OC: 'static + OpcodeVariant>( - program: String, + program: &str, config: Option, ) -> Result, String> where BrainfuckBuilderData: BrainfuckBuilder, - CellAllocatorData<'a, TC>: CellAllocator, + CellAllocatorData: CellAllocator, { let ctx = MastermindContext { config: config.unwrap_or(OPT_NONE), }; - let tokens: Vec = tokenise(&program)?; + let tokens: Vec = tokenise(program)?; let clauses = parse::(&tokens)?; let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); let bf_code = ctx.ir_to_bf(instructions, None)?; @@ -139,80 +134,35 @@ pub mod black_box_tests { Ok(bf_code) } - // #[test] - fn dummy_success_test() { - let program = String::from(""); - let input = String::from(""); - let desired_output = String::from(""); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) - } - - // #[test] - fn dummy_compile_fail_test() { - let program = String::from(""); - let result = compile_program::(program, None); - assert!(result.is_err()); - } - - // #[test] - fn dummy_code_test() { - let program = String::from(""); - let desired_code = String::from(""); - let code = compile_program::(program, None) - .expect("") - .to_string(); - println!("{code}"); - assert_eq!(desired_code, code); - - let input = String::from(""); - let desired_output = String::from(""); - let output = run_code(BVM_CONFIG_1D, code, input, None); - println!("{output}"); - assert_eq!(desired_output, output) - } - #[test] fn empty_program_1() { - let program = String::from(""); - let input = String::from(""); - let desired_output = String::from(""); - let output = compile_and_run::(program, input).expect(""); - assert_eq!(desired_output, output) + assert_eq!(compile_and_run::("", "").unwrap(), ""); } #[test] fn empty_program_2() { - let program = String::from(";"); - let input = String::from(""); - let desired_output = String::from(""); - let output = compile_and_run::(program, input).expect(""); - assert_eq!(desired_output, output) + assert_eq!(compile_and_run::(";", "").unwrap(), ""); } #[test] fn empty_program_3() { - let program = String::from(";;;;;;"); - let input = String::from(""); - let desired_output = String::from(""); - let output = compile_and_run::(program, input).expect(""); - assert_eq!(desired_output, output) + assert_eq!( + compile_and_run::(";;;;;;", "").unwrap(), + "" + ); } #[test] fn empty_program_4() { - let program = String::from(";;{;{;};};;;"); - let input = String::from(""); - let desired_output = String::from(""); - let output = compile_and_run::(program, input).expect(""); - assert_eq!(desired_output, output) + assert_eq!( + compile_and_run::(";;{;{;};};;;", "").unwrap(), + "" + ); } #[test] fn hello_1() { - let program = String::from( - " + let program = r#" cell h = 8; cell e = 5; cell l = 12; @@ -227,40 +177,32 @@ output l; output o; cell ten = 10; output ten; - ", - ); - let input = String::from(""); - let desired_output = String::from("hello\n"); +"#; assert_eq!( - desired_output, - compile_and_run::(program, input).expect("") + compile_and_run::(program, "").unwrap(), + "hello\n" ); } #[test] fn hello_2() { - let program = String::from( - " + let program = r#" output 'h'; output 'e'; output 'l'; output 'l'; output 'o'; output 10; - ", - ); - let input = String::from(""); - let desired_output = String::from("hello\n"); +"#; assert_eq!( - desired_output, - compile_and_run::(program, input).expect("") - ) + compile_and_run::(program, "").unwrap(), + "hello\n" + ); } #[test] fn hello_3() { - let program = String::from( - r#" + let program = r#" output 'h' ;;; // comment cell[5] EEL = "ello\n"; @@ -272,19 +214,16 @@ output EEL[4]; output '\n'; output 0; output 70; - "#, - ); - let input = String::from(""); - let desired_output = String::from("hello\n\n\0F"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "hello\n\n\0F" + ) } #[test] fn hello_4() { - let program = String::from( - r#" + let program = r#" cell[4] str = [5, 12, 12, 15]; cell a = 'a' - 1; drain a into *str; @@ -293,48 +232,39 @@ output *str; output 46; output 10; output "What?"; -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "Hello.\nWhat?" ); - let input = String::from(""); - let desired_output = String::from("Hello.\nWhat?"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn hello_5() { - let program = String::from( - r#" + let program = r#" output "Hell"; output ['o', '.', '\n']; -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "Hello.\n" ); - let input = String::from(""); - let desired_output = String::from("Hello.\n"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn expressions_1() { - let program = String::from( - r#" + let program = r#" output '@' + 256 + 1 + false + true + 'e' - '@'; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "g" ); - let input = String::from(""); - let desired_output = String::from("g"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn expressions_2() { - let program = String::from( - r#" + let program = r#" cell p = 9 - (true + true -(-7)); if not p { output "Hi friend!\n"; @@ -347,19 +277,16 @@ if q { } else { output "path b"; } - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "Hi friend!\npath b" ); - let input = String::from(""); - let desired_output = String::from("Hi friend!\npath b"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn expressions_3() { - let program = String::from( - r#" + let program = r#" if 56 - 7 { output 'A'; } else { @@ -379,19 +306,16 @@ if not_a - 'a' { } else { output 'F'; } - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "ACb" ); - let input = String::from(""); - let desired_output = String::from("ACb"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn expressions_4() { - let program = String::from( - r#" + let program = r#" cell x = 5; cell A = 'A'; @@ -401,102 +325,84 @@ drain 0 + x + 1 into A { output ' '; output A; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "666666 G" ); - let input = String::from(""); - let desired_output = String::from("666666 G"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_1() { - let program = String::from( - r#" + let program = r#" cell x = 5; output '0' + x; x += 1; output '0' + x; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "56" ); - let input = String::from(""); - let desired_output = String::from("56"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_2() { - let program = String::from( - r#" + let program = r#" cell x = 5; output '0' + x; x = x + 1; output '0' + x; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "56" ); - let input = String::from(""); - let desired_output = String::from("56"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_3() { - let program = String::from( - r#" + let program = r#" cell x = 5; output '0' + x; x += 1 + x; output '0' + x; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "5;" ); - let input = String::from(""); - let desired_output = String::from("5;"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_4() { - let program = String::from( - r#" + let program = r#" cell x = 2; output '0' + x; x = x + x + x; output '0' + x; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "26" ); - let input = String::from(""); - let desired_output = String::from("26"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_5() { - let program = String::from( - r#" + let program = r#" cell x = 2; x = (2 + 3) - ((x + 4) + 1) + 4 - (12) + (3 + 10); output '0' + x; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "3" ); - let input = String::from(""); - let desired_output = String::from("3"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_6() { - let program = String::from( - r#" + let program = r#" cell[2] x = [4, 5]; x[0] = x[0] + 4; x[1] = x[1] - 3; @@ -504,19 +410,16 @@ x[1] = x[1] - 3; x[0] += '0'; x[1] += '0'; output *x; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "82" ); - let input = String::from(""); - let desired_output = String::from("82"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_7() { - let program = String::from( - r#" + let program = r#" cell[2] x = [1, 2]; x[0] = x[1] + 5; // 7 x[1] = x[0] + x[1]; // 9 @@ -524,107 +427,85 @@ x[1] = x[0] + x[1]; // 9 x[0] += '0'; x[1] += '0'; output *x; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "79" ); - let input = String::from(""); - let desired_output = String::from("79"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_8() { - let program = String::from( - r#" + let program = r#" cell x = 128; output x - 2; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "~" ); - let input = String::from(""); - let desired_output = String::from("~"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_8a() { - let program = String::from( - r#" + let program = r#" cell x = 127; cell y = 64; x += y + y; output x + 'f' + 1; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "f" ); - let input = String::from(""); - let desired_output = String::from("f"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn assignments_8b() { - let program = String::from( - r#" + let program = r#" cell x = 128; cell y = 64; x += y + y; output x + 'f'; - "#, - ); - let input = String::from(""); - let desired_output = String::from("f"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "f" + ) } #[test] fn assignments_9() -> Result<(), String> { - let program = String::from( - r#" + let program = r#" cell x = 128; x += 128; output x + 'f'; - "#, - ); - let input = String::from(""); - let desired_output = String::from("f"); - let code = compile_program::(program, Some(OPT_ALL))?; - assert_eq!( - desired_output, - run_code(BVM_CONFIG_1D, code.to_string(), input, None) - ); +"#; + let code = compile_program::(program, Some(OPT_ALL))?.to_string(); + println!("{code}"); + assert!(code.len() < 200); + assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None)?, "f"); Ok(()) } #[test] fn assignments_9a() -> Result<(), String> { - let program = String::from( - r#" + let program = r#" cell x = 126; x += 2; x += 128; output x + 'f'; - "#, - ); - let input = String::from(""); - let desired_output = String::from("f"); - let code = compile_program::(program, Some(OPT_ALL))?; - assert_eq!( - desired_output, - run_code(BVM_CONFIG_1D, code.to_string(), input, None) - ); +"#; + let code = compile_program::(program, Some(OPT_ALL))?.to_string(); + println!("{code}"); + assert!(code.len() < 200); + assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None)?, "f"); Ok(()) } #[test] fn loops_1() { - let program = String::from( - " + let program = r#" cell n = '0'; cell a = 10; cell b = 1; @@ -639,20 +520,17 @@ drain a { b += 1; output 10; }; - ", - ); - let input = String::from(""); - let desired_output = String::from("0AB\n1ABB\n2ABBB\n3ABBBB\n4ABBBBB\n5ABBBBBB\n6ABBBBBBB\n7ABBBBBBBB\n8ABBBBBBBBB\n9ABBBBBBBBBB\n"); +"#; assert_eq!( - desired_output, - compile_and_run::(program, input).expect("") + compile_and_run::(program, "").unwrap(), + "0AB\n1ABB\n2ABBB\n3ABBBB\n4ABBBBB\n5ABBBBBB\n6ABBBBBBB\n7ABBBBBBBB\n8ABBBBBBBB\ +B\n9ABBBBBBBBBB\n" ) } #[test] fn loops_2() { - let program = String::from( - " + let program = r#" cell a = 4; cell[6] b = [65, 65, 65, 65, 65, 1]; copy a into b[0] b[1] b[4] b[5] { @@ -668,36 +546,28 @@ copy a into b[0] b[1] b[4] b[5] { cell g = 5; drain g into a {output a;} - ", - ); - let input = String::from(""); - let desired_output = String::from("AABAA\nBBDAB\nCCGAC\nDDKAD\neefghi"); +"#; assert_eq!( - desired_output, - compile_and_run::(program, input).expect("") + compile_and_run::(program, "").expect(""), + "AABAA\nBBDAB\nCCGAC\nDDKAD\neefghi" ) } #[test] fn loops_3() { - let program = String::from( - " + let program = r#" drain 40; output 'h'; - ", - ); - let input = String::from(""); - let desired_output = String::from("h"); +"#; assert_eq!( - desired_output, - compile_and_run::(program, input).expect("") + compile_and_run::(program, "").expect(""), + "h" ) } #[test] fn ifs_1() { - let program = String::from( - " + let program = r#" cell x = 7; cell y = 9; @@ -727,19 +597,16 @@ if not z { }; output 10; - ", +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "ACE\n" ); - let input = String::from(""); - let desired_output = String::from("ACE\n"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn ifs_2() { - let program = String::from( - " + let program = r#" cell x = 7; cell y = 9; @@ -771,38 +638,32 @@ if not z { } output 10; - ", +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "ACE\n" ); - let input = String::from(""); - let desired_output = String::from("ACE\n"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn ifs_3() { - let program = String::from( - " + let program = r#" cell a = 5; if a { cell b = a + '0'; output b; } output 10; - ", +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "5\n" ); - let input = String::from(""); - let desired_output = String::from("5\n"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn loops_and_ifs_1() { - let program = String::from( - " + let program = r#" cell n = '0'; cell a = 6; cell b; @@ -825,19 +686,16 @@ drain a { b += 1; output 10; }; - ", +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "0ABB\n1ABB\n2ABB\n3ABBBBBBBBBB\n4ABB\n5ABB\n" ); - let input = String::from(""); - let desired_output = String::from("0ABB\n1ABB\n2ABB\n3ABBBBBBBBBB\n4ABB\n5ABB\n"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn functions_1() { - let program = String::from( - " + let program = r#" cell global_var = '0'; fn func_0(cell grape) { @@ -862,19 +720,16 @@ output global_var; output global_var; output 10; - ", +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "010131\n" ); - let input = String::from(""); - let desired_output = String::from("010131\n"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn functions_2() -> Result<(), String> { - let program = String::from( - " + let program = r#" cell global_var = '0'; fn func_0(cell grape) { @@ -898,23 +753,18 @@ func_0(global_var); output global_var; output 10; - ", - ); - let input = String::from(""); - let desired_output = String::from("01231\n"); - let code = compile_program::(program, Some(OPT_NONE))?.to_string(); - println!("{}", code); - let output = run_code(BVM_CONFIG_1D, code, input, None); - println!("{output}"); - assert_eq!(desired_output, output); - +"#; + let code = compile_program::(program, None) + .unwrap() + .to_string(); + println!("{code}"); + assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None)?, "01231\n"); Ok(()) } #[test] fn functions_3() { - let program = String::from( - " + let program = r#" cell global_var = '0'; cell[2] global_vars = ['0', 64]; @@ -972,19 +822,16 @@ fn func_2(cell[4] think, cell green) { // output green; // green = 0; }; - ", +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "01202726631\n@1202726631\n" ); - let input = String::from(""); - let desired_output = String::from("01202726631\n@1202726631\n"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn functions_3a() { - let program = String::from( - r#" + let program = r#" cell[4] a = "AACD"; add_one(a[1]); output *a; @@ -992,19 +839,16 @@ output *a; fn add_one(cell cel) { ++cel; } -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "ABCD" ); - let input = String::from(""); - let desired_output = String::from("ABCD"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn functions_3b() { - let program = String::from( - r#" + let program = r#" struct A {cell[3] arr;}; struct A a; a.arr[0] = '0'; @@ -1019,19 +863,16 @@ fn add_one_to_three(cell[3] t) { t[1] += 1; t[2] += 1; } -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "111" ); - let input = String::from(""); - let desired_output = String::from("111"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn functions_3c() { - let program = String::from( - r#" + let program = r#" struct A {cell b; cell c;}; struct A a; a.b = '0'; @@ -1046,19 +887,16 @@ output a.c; fn add_one(cell t) { ++t; } -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "12" ); - let input = String::from(""); - let desired_output = String::from("12"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn functions_3d() { - let program = String::from( - r#" + let program = r#" struct A {cell b; cell c;}; struct A a; a.b = '0'; @@ -1084,19 +922,16 @@ fn add_one(struct A t) { ++t.b; ++t.c; } -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "12\n23" ); - let input = String::from(""); - let desired_output = String::from("12\n23"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn functions_3e() { - let program = String::from( - r#" + let program = r#" struct A {cell b; cell c;}; struct A a; a.b = '0'; @@ -1123,20 +958,17 @@ fn add_one(struct A t, cell a) { ++t.c; ++a; } -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "12\n33" ); - let input = String::from(""); - let desired_output = String::from("12\n33"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] #[should_panic] fn functions_3f() { - let program = String::from( - r#" + let program = r#" struct A {cell b; cell c;}; struct A a; a.b = '0'; @@ -1167,38 +999,32 @@ fn add_one(struct A t, cell a) { fn add_one(struct A tfoaishjdf, cell aaewofjas) { output "hello"; } -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "12\n33" ); - let input = String::from(""); - let desired_output = String::from("12\n33"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn functions_4() { - let program = String::from( - r#" + let program = r#" fn hello() { output "hello"; } hello(); output 10; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "hello\n" ); - let input = String::from(""); - let desired_output = String::from("hello\n"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn function_overloads_1() { - let program = String::from( - r#" + let program = r#" fn hello(cell h) { output "hello: "; output h; @@ -1212,19 +1038,16 @@ output 10; cell g = 'g'; hello(g); output 10; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "hello\nhello: g\n" ); - let input = String::from(""); - let desired_output = String::from("hello\nhello: g\n"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn function_overloads_1a() { - let program = String::from( - r#" + let program = r#" fn hello() { output "hello"; } @@ -1239,36 +1062,30 @@ output 10; cell g = 'g'; hello(g); output 10; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "hello\nhello: g\n" ); - let input = String::from(""); - let desired_output = String::from("hello\nhello: g\n"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn input_1() { - let program = String::from( - " + let program = r#" cell b; input b; ++b; output b; -", +"#; + assert_eq!( + compile_and_run::(program, "A").unwrap(), + "B" ); - let input = String::from("A"); - let desired_output = String::from("B"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn input_2() { - let program = String::from( - r#" + let program = r#" cell[3] b; input b[0]; input b[1]; @@ -1283,19 +1100,16 @@ b[2]+=1; output b[2]; output b[1]; output b[0]; -"#, +"#; + assert_eq!( + compile_and_run::(program, "ABC").unwrap(), + "ABC\nDDD" ); - let input = String::from("ABC"); - let desired_output = String::from("ABC\nDDD"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn memory_1() { - let program = String::from( - r#" + let program = r#" cell[3] b = "Foo"; fn inc(cell h, cell g) { @@ -1312,19 +1126,16 @@ output 10; cell c = -1; inc(c, c); output c; -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "FooFpp\nZ" ); - let input = String::from(""); - let desired_output = String::from("FooFpp\nZ"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn memory_2() { - let program = String::from( - r#" + let program = r#" cell[3] b = [1, 2, 3]; fn drain_h(cell h) { @@ -1347,19 +1158,16 @@ cell u = 'a' - 1; cell[5] v = [8, 5, 12, 12, 15]; drain_into(u, v); output *v; -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "hhh hh hello" ); - let input = String::from(""); - let desired_output = String::from("hhh hh hello"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn blocks_1() { - let program = String::from( - r#" + let program = r#" {{{{{{{ cell g = 0 + 5 + (-(-5)); output "Freidns"; @@ -1367,19 +1175,16 @@ output *v; output g; } }}}}}}} -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "Freidns\n" ); - let input = String::from(""); - let desired_output = String::from("Freidns\n"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn blocks_2() { - let program = String::from( - r#" + let program = r#" cell f = 'f'; output f; { @@ -1387,19 +1192,16 @@ output f; output f; } output f; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "fFf" ); - let input = String::from(""); - let desired_output = String::from("fFf"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn dimensional_arrays_1() { - let program = String::from( - r#" + let program = r#" cell[4][3] g; g[0][0] = 5 + '0'; g[0][1] = 4 + '0'; @@ -1429,19 +1231,16 @@ output g[2][0]; output g[2][1]; output g[2][2]; output g[2][3]; -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "543112320003" ); - let input = String::from(""); - let desired_output = String::from("543112320003"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_1() { - let program = String::from( - r#" + let program = r#" struct AA { cell green; cell yellow; @@ -1455,19 +1254,16 @@ a.green = 6; a.yellow = 4; output '0' + a.green; output '0' + a.yellow; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "0064" ); - let input = String::from(""); - let desired_output = String::from("0064"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_2() { - let program = String::from( - r#" + let program = r#" struct AA { cell green; cell yellow; @@ -1482,19 +1278,16 @@ a.green = 5; a.yellow = 2; output '0' + a.green; output '0' + a.yellow; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "3452" ); - let input = String::from(""); - let desired_output = String::from("3452"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_3() { - let program = String::from( - r#" + let program = r#" struct AA { cell green; cell yellow; @@ -1511,19 +1304,16 @@ input_AA(a); output a.yellow; output a.green; - "#, +"#; + assert_eq!( + compile_and_run::(program, "gh").unwrap(), + "hg" ); - let input = String::from("gh"); - let desired_output = String::from("hg"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_3a() { - let program = String::from( - r#" + let program = r#" struct AA a; fn input_AA(struct AA bbb) { @@ -1544,19 +1334,16 @@ struct AA { cell green; cell yellow; } - "#, +"#; + assert_eq!( + compile_and_run::(program, "gh").unwrap(), + "hg" ); - let input = String::from("gh"); - let desired_output = String::from("hg"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_3b() { - let program = String::from( - r#" + let program = r#" struct AA a; fn input_AA(struct AA bbb) { @@ -1582,19 +1369,16 @@ struct AA { cell green; cell yellow; } - "#, +"#; + assert_eq!( + compile_and_run::(program, "ghpalindrome").unwrap(), + "nhg" ); - let input = String::from("ghpalindrome"); - let desired_output = String::from("nhg"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_4a() { - let program = String::from( - r#" + let program = r#" struct AA a; input a.green; input a.yellow; @@ -1616,19 +1400,16 @@ output a.reds[1]; output a.reds[2]; output a.reds[3]; output '\n'; - "#, +"#; + assert_eq!( + compile_and_run::(program, "hellow").unwrap(), + "helowl\n" ); - let input = String::from("hellow"); - let desired_output = String::from("helowl\n"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_4b() { - let program = String::from( - r#" + let program = r#" struct AA a; input a.green; input a.yellow; @@ -1644,19 +1425,16 @@ output *a.reds; output a.yellow; output a.green; output '\n'; - "#, +"#; + assert_eq!( + compile_and_run::(program, "gy0123").unwrap(), + "0123yg\n" ); - let input = String::from("gy0123"); - let desired_output = String::from("0123yg\n"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_4c() { - let program = String::from( - r#" + let program = r#" struct AA a; input a.green; input a.yellow; @@ -1682,20 +1460,17 @@ output *a.sub.blues; output a.yellow; output a.green; output '\n'; - "#, +"#; + assert_eq!( + compile_and_run::(program, "gy-+t").unwrap(), + "t-+yg\n" ); - let input = String::from("gy-+t"); - let desired_output = String::from("t-+yg\n"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] #[should_panic] fn structs_4d() { - let program = String::from( - r#" + let program = r#" struct AA a; input *a.reds; @@ -1706,19 +1481,15 @@ struct AA { output a.reds[4]; output '\n'; - "#, - ); - let input = String::from("0123a"); - let desired_output = String::from("a\n"); - let output = compile_and_run::(program, input).expect(""); +"#; + let output = compile_and_run::(program, "0123a").expect(""); println!("{output}"); - assert_eq!(desired_output, output) + assert_eq!(output, "a\n"); } #[test] fn structs_5() { - let program = String::from( - r#" + let program = r#" struct AA { cell green; } @@ -1729,19 +1500,16 @@ as[1].green = 3; output '0' + as[0].green; output '0' + as[1].green; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "53" ); - let input = String::from(""); - let desired_output = String::from("53"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_5a() { - let program = String::from( - r#" + let program = r#" struct AAA[2] as; as[0].green = 5; as[1].green = 3; @@ -1752,19 +1520,16 @@ output '0' + as[1].green; struct AAA { cell green; } -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "53" ); - let input = String::from(""); - let desired_output = String::from("53"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_6() { - let program = String::from( - r#" + let program = r#" struct AA { cell green; } @@ -1789,19 +1554,15 @@ input_AAs(as); output '0' + b.green; output as[0].green; output as[1].green; - "#, - ); - let input = String::from("tr"); - let desired_output = String::from("HI\n6tr"); - let output = compile_and_run::(program, input).expect(""); +"#; + let output = compile_and_run::(program, "tr").expect(""); println!("{output}"); - assert_eq!(desired_output, output) + assert_eq!(output, "HI\n6tr"); } #[test] fn structs_7() { - let program = String::from( - r#" + let program = r#" struct BB { cell green; } @@ -1837,19 +1598,15 @@ output as[1].green; output as[1].bbb[0].green; output as[1].bbb[1].green; output as[1].bbb[2].green; - "#, - ); - let input = String::from("abcdefgh"); - let desired_output = String::from("HI\ngabchdef"); - let output = compile_and_run::(program, input).expect(""); +"#; + let output = compile_and_run::(program, "abcdefgh").expect(""); println!("{output}"); - assert_eq!(desired_output, output) + assert_eq!(output, "HI\ngabchdef"); } #[test] fn structs_7a() { - let program = String::from( - r#" + let program = r#" struct BB { cell green @2; } @@ -1885,19 +1642,15 @@ output as[1].green; output as[1].bbb[0].green; output as[1].bbb[1].green; output as[1].bbb[2].green; - "#, - ); - let input = String::from("abcdefgh"); - let desired_output = String::from("HI\ngabchdef"); - let output = compile_and_run::(program, input).expect(""); +"#; + let output = compile_and_run::(program, "abcdefgh").expect(""); println!("{output}"); - assert_eq!(desired_output, output) + assert_eq!(output, "HI\ngabchdef"); } #[test] fn structs_bf_1() { - let program = String::from( - r#" + let program = r#" struct Frame { cell marker @3; cell value @0; @@ -1920,22 +1673,16 @@ vec1.frames[2].value = 'l'; bf @2 { [>.>>>] } - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "jkl" ); - let input = String::from(""); - let desired_output = String::from("jkl"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] - // TODO: fix the r_panic macro that makes this error have unescaped quotes in it (weird) - // #[should_panic(expected = r#"Subfields "marker" and "temp_cells" overlap in struct."#)] - #[should_panic] fn structs_bf_1a() { - let program = String::from( - r#" + let program = r#" struct Frame { cell marker @2; cell value @0; @@ -1943,22 +1690,16 @@ struct Frame { } struct Frame f; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap_err(), + "" ); - let input = String::from(""); - let desired_output = String::from(""); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] - // TODO: fix the r_panic macro that makes this error have unescaped quotes in it (weird) - // #[should_panic(expected = r#"Subfields "marker" and "temp_cells" overlap in struct."#)] - #[should_panic] fn structs_bf_1b() { - let program = String::from( - r#" + let program = r#" struct Frame { cell marker @-2; cell value @0; @@ -1966,20 +1707,16 @@ struct Frame { } struct Frame f; - "#, +"#; + assert_eq!( + compile_program::(program, None).unwrap_err(), + "" ); - let input = String::from(""); - let desired_output = String::from(""); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] - #[should_panic] fn structs_bf_1c() { - let program = String::from( - r#" + let program = r#" struct G { cell a @1; cell b @1; @@ -1991,19 +1728,16 @@ g.b = 'b'; output g.a; output g.b; - "#, +"#; + assert_eq!( + compile_program::(program, None).unwrap_err(), + "" ); - let input = String::from(""); - let desired_output = String::from("ab"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn structs_bf_2() { - let program = String::from( - r#" + let program = r#" struct Green { // no @0 cell cell blue @1; @@ -2015,146 +1749,122 @@ output g.blue; bf @4 { >.< } - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "55" ); - let input = String::from(""); - let desired_output = String::from("55"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_0() { - let program = String::from( - r#" + let program = r#" output '0' + sizeof(cell); - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "1" ); - let input = String::from(""); - let desired_output = String::from("1"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_0a() { - let program = String::from( - r#" + let program = r#" output '0' + sizeof(cell[5]); - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "5" ); - let input = String::from(""); - let desired_output = String::from("5"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_0b() { - let program = String::from( - r#" + let program = r#" cell a; cell b[4]; output '0' + sizeof(a); output '0' + sizeof(b); output '0' + sizeof(b[2]); - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "141" ); - let input = String::from(""); - let desired_output = String::from("141"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_1() { - let program = String::from( - r#" + let program = r#" struct Green { cell blue; } let s = sizeof(struct Green); output '0' + s; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "1" ); - let input = String::from(""); - let desired_output = String::from("1"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_1a() { - let program = String::from( - r#" + let program = r#" struct Green { cell blue; } let s = sizeof(struct Green[3]); output '0' + s; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "3" ); - let input = String::from(""); - let desired_output = String::from("3"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_1b() { - let program = String::from( - r#" + let program = r#" struct Green { cell blue; } let s = sizeof(struct Green[3][2]); output '0' + s; - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "6" ); - let input = String::from(""); - let desired_output = String::from("6"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_2() { - let program = String::from( - r#" + let program = r#" struct Green { cell blue; cell red; } struct Green g; output '0' + sizeof(g); - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "2" ); - let input = String::from(""); - let desired_output = String::from("2"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_3() { - let program = String::from( - r#" + let program = r#" struct Green { cell blue; cell[5] red; @@ -2165,20 +1875,17 @@ output '0' + sizeof(g) - 13; output '0' + sizeof(g[0].blue); output '0' + sizeof(g[0].red); - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "115" ); - let input = String::from(""); - let desired_output = String::from("115"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_4() { - let program = String::from( - r#" + let program = r#" struct Green { cell blue @2; } @@ -2186,20 +1893,17 @@ struct Green[3] g; output '0' + sizeof(struct Green); output '0' + sizeof(g); output '0' + sizeof(g[2].blue) - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "391" ); - let input = String::from(""); - let desired_output = String::from("391"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[ignore] #[test] fn sizeof_5() { - let program = String::from( - r#" + let program = r#" struct Blue { cell[2] blues; } @@ -2217,19 +1921,16 @@ struct Green[3] g; output '0' + sizeof(struct Green); output '0' + sizeof(g) - 17; output '0' + sizeof(g[2].blue) - "#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "23612" ); - let input = String::from(""); - let desired_output = String::from("23612"); - let output = compile_and_run::(program, input).expect(""); - println!("{output}"); - assert_eq!(desired_output, output) } #[test] fn memory_specifiers_1() -> Result<(), String> { - let program = String::from( - r#" + let program = r#" cell foo @3 = 2; { cell n = 12; @@ -2239,240 +1940,171 @@ cell foo @3 = 2; } } output foo; -"#, - ); +"#; let code = compile_program::(program, None)?.to_string(); - println!("{code}"); - - let input = String::from(""); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); assert_eq!(code, ">>>++<<<++++++++++++[->>>++++++++++<<<][-]>>>."); - assert_eq!(output, "z"); + assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None)?, "z"); Ok(()) } #[test] fn memory_specifiers_2() -> Result<(), String> { - let program = String::from( - r#" + let program = r#" cell a @5 = 4; cell foo @0 = 2; cell b = 10; -"#, - ); +"#; let code = compile_program::(program, None)?.to_string(); println!("{code}"); - assert!(code.starts_with(">>>>>++++<<<<<++>++++++++++")); Ok(()) } #[test] fn memory_specifiers_3() -> Result<(), String> { - let program = String::from( - r#" + let program = r#" cell a @1 = 1; cell foo @0 = 2; cell b = 3; -"#, - ); +"#; let code = compile_program::(program, None)?.to_string(); println!("{code}"); - assert!(code.starts_with(">+<++>>+++")); Ok(()) } #[test] fn memory_specifiers_4() { - let program = String::from( - r#" + let program = r#" cell a @1 = 1; cell foo @1 = 2; cell b = 3; -"#, +"#; + assert_eq!( + compile_program::(program, None).unwrap_err(), + "Location specifier @1 conflicts with another allocation" ); - // assert_eq!( - // compile_program::(program, None).unwrap_err(), - // "Location specifier @1 conflicts with another allocation" - // ); - // TODO: fix the need for this - assert!(compile_program::(program, None) - .unwrap_err() - .contains("conflicts with another allocation")); } #[test] - fn variable_location_specifiers_1() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_1() { + let program = r#" cell a = 'h'; bf @a {.} -"#, +"#; + assert_eq!( + compile_and_run::(program, "wxy").unwrap(), + "h" ); - let code = compile_program::(program, None)?.to_string(); - println!("{code}"); - - let input = String::from("wxy"); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); - assert_eq!(output, "h"); - Ok(()) } #[test] - fn variable_location_specifiers_1a() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_1a() { + let program = r#" cell[100] _; cell a = 'h'; cell[4] b; bf @a {.} -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "h" ); - let code = compile_program::(program, None)?.to_string(); - println!("{code}"); - - let input = String::from(""); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); - assert_eq!(output, "h"); - Ok(()) } #[test] fn variable_location_specifiers_2() -> Result<(), String> { - let program = String::from( - r#" + let program = r#" struct Test {cell[3] a @0; cell b;} struct Test t; input *t.a; bf @t.a { [+.>] } -"#, - ); +"#; let code = compile_program::(program, None)?.to_string(); - println!("{code}"); - - let input = String::from("wxy"); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); assert_eq!(code, ",>,>,<<[+.>]"); - assert_eq!(output, "xyz"); + assert_eq!(run_code(BVM_CONFIG_1D, &code, "wxy", None)?, "xyz"); Ok(()) } #[test] fn variable_location_specifiers_2a() -> Result<(), String> { - let program = String::from( - r#" + let program = r#" struct Test {cell[3] a @0; cell b;} struct Test t; input *t.a; bf @t { [+.>] } -"#, - ); +"#; let code = compile_program::(program, None)?.to_string(); - println!("{code}"); - - let input = String::from("wxy"); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); assert_eq!(code, ",>,>,<<[+.>]"); - assert_eq!(output, "xyz"); + assert_eq!(run_code(BVM_CONFIG_1D, &code, "wxy", None)?, "xyz"); Ok(()) } #[test] - fn variable_location_specifiers_3() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_3() { + let program = r#" cell[5] f @6 = "abcde"; bf @f[2] clobbers *f {.+++.} output 10; output *f; -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "cf\nabfde" ); - let code = compile_program::(program, None)?.to_string(); - println!("{code}"); - - let input = String::from(""); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); - assert_eq!(output, "cf\nabfde"); - Ok(()) } #[test] - fn variable_location_specifiers_3a() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_3a() { + let program = r#" cell[4] f @8 = "xyz "; bf @f {[.>]} -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "xyz " ); - let code = compile_program::(program, None)?.to_string(); - println!("{code}"); - - let input = String::from(""); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); - assert_eq!(output, "xyz "); - Ok(()) } #[test] - fn variable_location_specifiers_4() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_4() { + let program = r#" fn func(cell g) { bf @g {+.-} } cell a = '5'; func(a); -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "6" ); - let code = compile_program::(program, None)?.to_string(); - println!("{code}"); - - let input = String::from(""); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); - assert_eq!(output, "6"); - Ok(()) } #[test] - fn variable_location_specifiers_4a() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_4a() { + let program = r#" fn func(cell g) { bf @g {+.-} } cell[3] a = "456"; func(a[1]); -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "6" ); - let code = compile_program::(program, None)?.to_string(); - println!("{code}"); - - let input = String::from(""); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); - assert_eq!(output, "6"); - Ok(()) } #[test] - fn variable_location_specifiers_4b() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_4b() { + let program = r#" fn func(cell g) { bf @g {+.-} } @@ -2483,22 +2115,16 @@ a.r[0] = '4'; a.r[1] = '5'; a.r[2] = '6'; func(a.r[1]); -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "6" ); - let code = compile_program::(program, None)?.to_string(); - println!("{code}"); - - let input = String::from(""); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); - assert_eq!(output, "6"); - Ok(()) } #[test] - fn variable_location_specifiers_4c() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_4c() { + let program = r#" fn func(struct H h) { bf @h {+.-} } @@ -2509,22 +2135,16 @@ a.r[0] = '4'; a.r[1] = '5'; a.r[2] = '6'; func(a); -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "5" ); - let code = compile_program::(program, None)?.to_string(); - println!("{code}"); - - let input = String::from(""); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); - assert_eq!(output, "5"); - Ok(()) } #[test] - fn variable_location_specifiers_4d() -> Result<(), String> { - let program = String::from( - r#" + fn variable_location_specifiers_4d() { + let program = r#" fn func(cell[2] g) { bf @g {+.-} } @@ -2535,105 +2155,89 @@ struct H a; a.jj.j[0] = '3'; a.jj.j[1] = '4'; func(a.jj.j); -"#, +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "4" ); - let code = compile_program::(program, None)?.to_string(); - println!("{code}"); - - let input = String::from(""); - let output = run_code(BVM_CONFIG_1D, code.clone(), input, None); - println!("{output}"); - assert_eq!(output, "4"); - Ok(()) } #[test] fn assertions_1() -> Result<(), String> { - let program = String::from( - r#" + let program = r#" cell a @0 = 5; output a; assert a equals 2; a = 0; output a; -"#, - ); +"#; let code = compile_program::(program, Some(OPT_ALL))?.to_string(); println!("{code}"); - assert!(code.starts_with("+++++.--.")); Ok(()) } #[test] fn assertions_2() -> Result<(), String> { - let program = String::from( - r#" + let program = r#" cell a @0 = 2; output a; assert a unknown; a = 0; output a; -"#, - ); +"#; let code = compile_program::(program, Some(OPT_ALL))?.to_string(); println!("{code}"); - assert!(code.starts_with("++.[-].")); Ok(()) } #[test] - fn inline_brainfuck_1() -> Result<(), String> { - let program = String::from( - r#" + fn inline_brainfuck_1() { + let program = r#" bf { ,.[-] +[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+. } -"#, - ); - let code = compile_program::(program, None)?.to_string(); - println!("{code}"); - +"#; + let code = compile_program::(program, None) + .unwrap() + .to_string(); assert_eq!( code, ",.[-]+[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+." ); - - let output = run_code(BVM_CONFIG_1D, code, String::from("~"), None); - assert_eq!(output, "~Hello, World!"); - Ok(()) + assert_eq!( + run_code(BVM_CONFIG_1D, &code, "~", None).unwrap(), + "~Hello, World!" + ); } #[test] fn inline_brainfuck_2() -> Result<(), String> { - let program = String::from( - r#" + let program = r#" // cell a @0; // cell b @1; bf @3 { ,.[-] +[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+. } -"#, - ); +"#; let code = compile_program::(program, None)?.to_string(); println!("{code}"); - assert!(code.starts_with( ">>>,.[-]+[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+." )); - - let output = run_code(BVM_CONFIG_1D, code, String::from("~"), None); - assert_eq!(output, "~Hello, World!"); + assert_eq!( + run_code(BVM_CONFIG_1D, &code, "~", None).unwrap(), + "~Hello, World!" + ); Ok(()) } #[test] - fn inline_brainfuck_3() -> Result<(), String> { - let program = String::from( - r#" + fn inline_brainfuck_3() { + let program = r#" cell[3] str @0; bf @0 clobbers *str { @@ -2651,22 +2255,18 @@ bf @0 clobbers *str { <<< } assert *str equals 0; -"#, - ); - let code = compile_program::(program, None)?.to_string(); +"#; + let code = compile_program::(program, None) + .unwrap() + .to_string(); println!("{code}"); - assert!(code.starts_with(",>,>,<<[+>]<<<[.[-]>]<<<")); - - let output = run_code(BVM_CONFIG_1D, code, String::from("HEY"), None); - assert_eq!(output, "IFZ"); - Ok(()) + assert_eq!(run_code(BVM_CONFIG_1D, &code, "HEY", None).unwrap(), "IFZ"); } #[test] - fn inline_brainfuck_4() -> Result<(), String> { - let program = String::from( - r#" + fn inline_brainfuck_4() { + let program = r#" bf { // enters a line of user input // runs some embedded mastermind for each character @@ -2683,20 +2283,22 @@ bf { ,---------- ] } -"#, - ); - let code = compile_program::(program, None)?.to_string(); +"#; + let code = compile_program::(program, None) + .unwrap() + .to_string(); println!("{code}"); - - let output = run_code(BVM_CONFIG_1D, code, String::from("line of input\n"), None); - assert_eq!(output, "lmijnoef !opfg !ijnopquvtu"); - Ok(()) + assert!(code.starts_with(".----------[++++++++++")); + assert!(code.ends_with("[-],----------]")); + assert_eq!( + run_code(BVM_CONFIG_1D, &code, "line of input\n", None).unwrap(), + "lmijnoef !opfg !ijnopquvtu" + ); } #[test] - fn inline_brainfuck_5() -> Result<(), String> { - let program = String::from( - r#" + fn inline_brainfuck_5() { + let program = r#" // external function within the same file, could be tricky to implement fn quote(cell n) { // H 'H' @@ -2721,20 +2323,22 @@ bf { ,---------- ] } -"#, - ); - let code = compile_program::(program, None)?.to_string(); +"#; + let code = compile_program::(program, None) + .unwrap() + .to_string(); println!("{code}"); - - let output = run_code(BVM_CONFIG_1D, code, String::from("hello\n"), None); - assert_eq!(output, "'h'\n'e'\n'l'\n'l'\n'o'\n"); - Ok(()) + assert!(code.starts_with(".----------[++++++++++")); + assert!(code.ends_with("[-],----------]")); + assert_eq!( + run_code(BVM_CONFIG_1D, &code, "hello\n", None).unwrap(), + "'h'\n'e'\n'l'\n'l'\n'o'\n" + ); } #[test] - fn inline_brainfuck_6() -> Result<(), String> { - let program = String::from( - r#" + fn inline_brainfuck_6() { + let program = r#" cell b = 4; bf { @@ -2744,96 +2348,90 @@ bf { } ++-- } -"#, +"#; + assert_eq!( + compile_program::(program, None).unwrap_err(), + "" ); - let result = compile_program::(program, None); - assert!(result.is_err()); - - Ok(()) } #[test] - fn inline_brainfuck_7() -> Result<(), String> { - let program = String::from( - r#" + fn inline_brainfuck_7() { + let program = r#" bf { ,>,>, << {{{{{{cell g @5 = 1;}}}}}} } - "#, +"#; + assert_eq!( + compile_program::(program, None) + .unwrap() + .to_string(), + ",>,>,<<>>>>>+[-]<<<<<" ); - let code = compile_program::(program, None)?.to_string(); - println!("{code}"); - - assert_eq!(code, ",>,>,<<>>>>>+[-]<<<<<"); - Ok(()) } - #[test] - fn inline_2d_brainfuck() -> Result<(), String> { - let program = String::from( - r#" - bf {,.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvvv.+++.------.vv-.^^^^+.} - "#, - ); - let code = compile_program::(program, None)?.to_string(); + #[test] + fn inline_2d_brainfuck() { + let program = r#" +bf {,.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvvv.+++.------.vv-.^^^^+.} +"#; + let code = compile_program::(program, None) + .unwrap() + .to_string(); assert_eq!( code, ",.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvvv.+++.------.vv-.^^^^+." ); - - let output = run_code(BVM_CONFIG_2D, code, String::from("~"), None); - assert_eq!(output, "~Hello, World!"); - Ok(()) + assert_eq!( + run_code(BVM_CONFIG_2D, &code, "~", None).unwrap(), + "~Hello, World!" + ); } + #[test] - #[should_panic(expected = "Invalid Inline Brainfuck Characters in vvstvv")] fn invalid_inline_2d_brainfuck() { - let program = String::from( - r#" - bf {,.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvstvv.+++.------.vv-.^^^^+.} - "#, + let program = r#" +bf {,.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvstvv.+++.------.vv-.^^^^+.} +"#; + assert_eq!( + compile_program::(program, None).unwrap_err(), + // TODO: make sure this works correctly after refactoring tokeniser + "Invalid Inline Brainfuck Characters in vvstvv" ); - let _result = compile_program::(program, None); } #[test] - #[should_panic(expected = "2D Brainfuck currently disabled")] fn inline_2d_brainfuck_disabled() { - run_code( - BVM_CONFIG_1D, - String::from( + assert_eq!( + run_code( + BVM_CONFIG_1D, ",.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvvv.+++.------.vv-.^^^^+.", - ), - String::from("~"), - None, + "~", + None, + ) + .unwrap(), + "~Hello, World!" ); } + #[test] - fn constant_optimisations_1() -> Result<(), String> { - let program = String::from( - " + fn constant_optimisations_1() { + let program = r#" output 'h'; - ", - ); - let input = String::from(""); - let desired_output = String::from("h"); - - let code = compile_program::(program, Some(OPT_ALL))?; - println!("{}", code.clone().to_string()); - assert_eq!( - desired_output, - run_code(BVM_CONFIG_1D, code.to_string(), input, None) - ); - - Ok(()) +"#; + let code = compile_program::(program, Some(OPT_ALL)) + .unwrap() + .to_string(); + println!("{code}"); + assert!(code.len() < 30); + assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None).unwrap(), "h"); } #[test] - fn constant_optimisations_2() -> Result<(), String> { - let program = String::from( - r#" + fn constant_optimisations_2() { + let program = r#" cell[15] arr @1; cell a = 'G'; cell b = a + 45; @@ -2841,28 +2439,22 @@ output b; b -= 43; output b; output a + 3; - "#, - ); - let input = String::from(""); - let desired_output = String::from("tIJ"); - - let code = compile_program::(program, Some(OPT_ALL))?.to_string(); - println!("{}", code); - assert_eq!(desired_output, run_code(BVM_CONFIG_1D, code, input, None)); - - Ok(()) +"#; + let code = compile_program::(program, Some(OPT_ALL)) + .unwrap() + .to_string(); + println!("{code}"); + assert!(code.len() < 100); + assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None).unwrap(), "tIJ"); } // TODO: remove the need for this #[test] - #[should_panic(expected = "Memory Allocation Method not implemented")] fn unimplemented_memory_allocation() { - let program = String::from( - r#" + let program = r#" cell[15] arr @1; cell a = 'G'; - "#, - ); +"#; let cfg = MastermindConfig { optimise_generated_code: false, optimise_generated_all_permutations: false, @@ -2875,53 +2467,52 @@ output a + 3; memory_allocation_method: 128, enable_2d_grid: false, }; - let _code = compile_program::(program, Some(cfg)); + assert_eq!( + compile_program::(program, Some(cfg)).unwrap_err(), + "Memory Allocation Method not implemented" + ); } #[test] - fn memory_specifiers_2d_1() -> Result<(), String> { - let program = String::from( - r#" + fn memory_specifiers_2d_1() { + let program = r#" cell a @(1, 2) = 1; cell foo @0 = 2; cell b = 3; -"#, - ); +"#; assert_eq!( - compile_program::(program, None)?.to_string(), + compile_program::(program, None) + .unwrap() + .to_string(), ">^^++++" ); - Ok(()) } #[test] - fn memory_specifiers_2d_2() -> Result<(), String> { - let program = String::from( - r#" + fn memory_specifiers_2d_2() { + let program = r#" cell[4][3] g @(1, 2); g[0][0] = 1; g[1][1] = 2; g[2][2] = 3; cell foo @0 = 2; cell b = 3; -"#, - ); +"#; assert_eq!( - compile_program::(program, None)?.to_string(), + compile_program::(program, None) + .unwrap() + .to_string(), ">^^[-]+>>>>>[-]++>>>>>[-]+++<<<<<<<<<<+++" ); - Ok(()) } #[test] fn memory_specifiers_2d_3() { - let program = String::from( - r#" + let program = r#" cell a @(1, 3) = 1; cell foo @(1, 3) = 2; cell b = 3; -"#, - ); +"#; assert_eq!( compile_program::(program, None).unwrap_err(), "Location specifier @(1, 3) conflicts with another allocation" @@ -2930,13 +2521,11 @@ cell b = 3; #[test] fn memory_specifiers_2d_4() { - let program = String::from( - r#" + let program = r#" cell a @2 = 1; cell foo @(2, 0) = 2; cell b = 3; -"#, - ); +"#; assert_eq!( compile_program::(program, None).unwrap_err(), "Location specifier @(2, 0) conflicts with another allocation" @@ -2945,12 +2534,10 @@ cell b = 3; #[test] fn memory_specifiers_2d_5() { - let program = String::from( - r#" + let program = r#" cell a @(2, 4) = 1; cell[4] b @(0, 4); -"#, - ); +"#; assert_eq!( compile_program::(program, None).unwrap_err(), "Location specifier @(0, 4) conflicts with another allocation" @@ -2958,9 +2545,8 @@ cell[4] b @(0, 4); } #[test] - fn tiles_memory_allocation_1() -> Result<(), String> { - let program = String::from( - r#" + fn tiles_memory_allocation_1() { + let program = r#" cell a = 1; cell b = 1; cell c = 1; @@ -2970,19 +2556,17 @@ cell f = 1; cell h = 1; cell i = 1; cell j = 1; - "#, +"#; + assert_eq!( + compile_program::(program, Some(OPT_NONE_TILES)) + .unwrap() + .to_string(), + "+vv+^^+>vv+^+^+" ); - let desired_output = String::from("+vv+^^+>vv+^+^+"); - - let code = compile_program::(program, Some(OPT_NONE_TILES))?.to_string(); - assert_eq!(desired_output, code); - - Ok(()) } #[test] - fn tiles_memory_allocation_2() -> Result<(), String> { - let program = String::from( - r#" + fn tiles_memory_allocation_2() { + let program = r#" cell a = '1'; cell b = '2'; cell c = '3'; @@ -3001,38 +2585,33 @@ output f; output g; output h; output i; - "#, +"#; + let code = compile_program::(program, Some(OPT_NONE_TILES)) + .unwrap() + .to_string(); + println!("{code}"); + assert!(false); // TODO: add some test here for memory allocation + assert_eq!( + run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), + "123456789" ); - let input = String::from(""); - let desired_output = String::from("123456789"); - - let code = compile_program::(program, Some(OPT_NONE_TILES))?.to_string(); - println!("{}", code); - assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); - - Ok(()) } - // TODO: decipher this #[test] - #[ignore] fn tiles_memory_allocation_3() { - let program = String::from( - r#" + let program = r#" cell a @(2, 4) = 1; cell[4] b @(0, 4); -"#, - ); +"#; assert_eq!( - compile_program::(program, Some(OPT_NONE_TILES)).unwrap_err(), + compile_program::(program, Some(OPT_NONE_TILES)).unwrap_err(), "Location specifier @(0, 4) conflicts with another allocation" ); } #[test] - fn tiles_memory_allocation_4() -> Result<(), String> { - let program = String::from( - r#" + fn tiles_memory_allocation_4() { + let program = r#" cell a @2 = 1; cell[4] b; a = '5'; @@ -3045,20 +2624,18 @@ output b[1]; output b[2]; output b[3]; output a; -"#, - ); - let code = compile_program::(program, Some(OPT_NONE_TILES))?.to_string(); - println!("{}", code); - let input = String::from(""); - let desired_output = String::from("12345"); - assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); - Ok(()) +"#; + let code = compile_program::(program, Some(OPT_NONE_TILES)) + .unwrap() + .to_string(); + println!("{code}"); + assert!(false); // TODO: add some test here for memory allocation + assert_eq!(run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), "12345"); } #[test] - fn zig_zag_memory_allocation_1() -> Result<(), String> { - let program = String::from( - r#" + fn zig_zag_memory_allocation_1() { + let program = r#" cell a = 1; cell b = 1; cell c = 1; @@ -3068,20 +2645,18 @@ cell f = 1; cell h = 1; cell i = 1; cell j = 1; - "#, +"#; + assert_eq!( + compile_program::(program, Some(OPT_NONE_ZIG_ZAG)) + .unwrap() + .to_string(), + "+>+<^+>>v+<^+<^+>>>vv+<^+<^+" ); - let desired_output = String::from("+>+<^+>>v+<^+<^+>>>vv+<^+<^+"); - - let code = - compile_program::(program, Some(OPT_NONE_ZIG_ZAG))?.to_string(); - assert_eq!(desired_output, code); - - Ok(()) } + #[test] - fn zig_zag_memory_allocation_2() -> Result<(), String> { - let program = String::from( - r#" + fn zig_zag_memory_allocation_2() { + let program = r#" cell a = '1'; cell b = '2'; cell c = '3'; @@ -3100,39 +2675,34 @@ output f; output g; output h; output i; - "#, - ); - let input = String::from(""); - let desired_output = String::from("123456789"); - - let code = - compile_program::(program, Some(OPT_NONE_ZIG_ZAG))?.to_string(); - println!("{}", code); - assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); +"#; - Ok(()) + let code = compile_program::(program, Some(OPT_NONE_ZIG_ZAG)) + .unwrap() + .to_string(); + println!("{code}",); + assert!(false); // TODO: add some test here for memory allocation + assert_eq!( + run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), + "123456789" + ); } - // TODO: decipher this #[test] - #[ignore] fn zig_zag_memory_allocation_3() { - let program = String::from( - r#" + let program = r#" cell a @(2, 4) = 1; cell[4] b @(0, 4); -"#, - ); +"#; assert_eq!( - compile_program::(program, Some(OPT_NONE_ZIG_ZAG)).unwrap_err(), + compile_program::(program, Some(OPT_NONE_ZIG_ZAG)).unwrap_err(), "Location specifier @(0, 4) conflicts with another allocation" ); } #[test] - fn zig_zag_memory_allocation_4() -> Result<(), String> { - let program = String::from( - r#" + fn zig_zag_memory_allocation_4() { + let program = r#" cell a @2 = 1; cell[4] b; a = '5'; @@ -3145,21 +2715,18 @@ output b[1]; output b[2]; output b[3]; output a; -"#, - ); - let code = - compile_program::(program, Some(OPT_NONE_ZIG_ZAG))?.to_string(); - println!("{}", code); - let input = String::from(""); - let desired_output = String::from("12345"); - assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); - Ok(()) +"#; + let code = compile_program::(program, Some(OPT_NONE_ZIG_ZAG)) + .unwrap() + .to_string(); + println!("{code}"); + assert!(false); // TODO: add some test here for memory allocation + assert_eq!(run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), "12345"); } #[test] - fn spiral_memory_allocation_1() -> Result<(), String> { - let program = String::from( - r#" + fn spiral_memory_allocation_1() { + let program = r#" cell a = 1; cell b = 1; cell c = 1; @@ -3169,21 +2736,17 @@ cell f = 1; cell h = 1; cell i = 1; cell j = 1; - "#, +"#; + assert_eq!( + compile_program::(program, Some(OPT_NONE_SPIRAL)) + .unwrap() + .to_string(), + "^+>+v+<+<+^+^+>+>+" ); - let desired_output = String::from("^+>+v+<+<+^+^+>+>+"); - - // TODO: fix this, this should fail in its current state - let code = compile_program::(program, Some(OPT_NONE_SPIRAL))?.to_string(); - println!("{code}"); - assert_eq!(desired_output, code); - - Ok(()) } #[test] - fn spiral_memory_allocation_2() -> Result<(), String> { - let program = String::from( - r#" + fn spiral_memory_allocation_2() { + let program = r#" cell a = '1'; cell b = '2'; cell c = '3'; @@ -3202,28 +2765,25 @@ output f; output g; output h; output i; - "#, +"#; + let code = compile_program::(program, Some(OPT_NONE_SPIRAL)) + .unwrap() + .to_string(); + println!("{code}"); + assert!(false); // TODO: add some test here for memory allocation + assert_eq!( + run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), + "123456789" ); - let input = String::from(""); - let desired_output = String::from("123456789"); - - let code = compile_program::(program, Some(OPT_NONE_SPIRAL))?.to_string(); - println!("{}", code); - assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); - - Ok(()) } // TODO: decipher this #[test] - #[ignore] fn spiral_memory_allocation_3() { - let program = String::from( - r#" + let program = r#" cell a @(2, 4) = 1; cell[4] b @(0, 4); -"#, - ); +"#; assert_eq!( compile_program::(program, Some(OPT_NONE_SPIRAL)).unwrap_err(), "Location specifier @(0,4) conflicts with another allocation" @@ -3231,9 +2791,8 @@ cell[4] b @(0, 4); } #[test] - fn spiral_memory_allocation_4() -> Result<(), String> { - let program = String::from( - r#" + fn spiral_memory_allocation_4() { + let program = r#" cell a @2 = 1; cell[4] b; a = '5'; @@ -3246,13 +2805,12 @@ output b[1]; output b[2]; output b[3]; output a; -"#, - ); - let code = compile_program::(program, Some(OPT_NONE_SPIRAL))?.to_string(); - println!("{}", code); - let input = String::from(""); - let desired_output = String::from("12345"); - assert_eq!(desired_output, run_code(BVM_CONFIG_2D, code, input, None)); - Ok(()) +"#; + let code = compile_program::(program, Some(OPT_NONE_SPIRAL)) + .unwrap() + .to_string(); + println!("{code}"); + assert!(false); // TODO: add some test here for memory allocation + assert_eq!(run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), "12345"); } } diff --git a/compiler/src/tokeniser.rs b/compiler/src/tokeniser.rs index 3eed4fc..dae5c3a 100644 --- a/compiler/src/tokeniser.rs +++ b/compiler/src/tokeniser.rs @@ -6,7 +6,7 @@ use regex_lite::Regex; // TODO: refactor: combine tokeniser and parser into one // make the inline brainfuck tokens contextual -pub fn tokenise(source: &String) -> Result, String> { +pub fn tokenise(source: &str) -> Result, String> { let stripped = source .lines() .map(strip_line) From 07f1ce04b08602f4b93c390444539e15748a6269 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Sun, 26 Oct 2025 19:49:43 +1100 Subject: [PATCH 21/56] Fix variable scope bug and tweak tests --- compiler/src/brainfuck.rs | 1 - compiler/src/frontend.rs | 32 +++--- compiler/src/parser.rs | 5 +- compiler/src/tests.rs | 201 +++++++++++++++----------------------- 4 files changed, 99 insertions(+), 140 deletions(-) diff --git a/compiler/src/brainfuck.rs b/compiler/src/brainfuck.rs index 86fac90..ed8bf6f 100644 --- a/compiler/src/brainfuck.rs +++ b/compiler/src/brainfuck.rs @@ -398,7 +398,6 @@ jgZK;;k[<=]lh^L=>=KgkJ==J^gklh_K>>m`?@bnicL@A@KhmJ@@JchmnidKAA\n" } #[test] - #[should_panic(expected = "2D Brainfuck currently disabled")] fn grid_disabled_2() { assert_eq!( run_code( diff --git a/compiler/src/frontend.rs b/compiler/src/frontend.rs index fb78460..43d1342 100644 --- a/compiler/src/frontend.rs +++ b/compiler/src/frontend.rs @@ -687,7 +687,7 @@ impl Memory { pub struct ScopeBuilder<'a, TC, OC> { /// a reference to the parent scope, for accessing things defined outside of this scope outer_scope: Option<&'a ScopeBuilder<'a, TC, OC>>, - /// fn_only: true if syntactic context instead of normal context. + /// If true, scope is not able to access variables from outer scope. /// Used for embedded mm so that the inner mm can use outer functions but not variables. types_only: bool, @@ -1130,15 +1130,15 @@ where // get the absolute type of the variable, as well as the memory allocations let (full_type, memory) = self.get_base_variable_memory(&target.name)?; // get the correct index within the memory and return - Ok(match (&target.subfields, full_type, memory) { - (None, ValueType::Cell, Memory::Cell { id }) => CellReference { + match (&target.subfields, full_type, memory) { + (None, ValueType::Cell, Memory::Cell { id }) => Ok(CellReference { memory_id: *id, index: None, - }, - (None, ValueType::Cell, Memory::MappedCell { id, index }) => CellReference { + }), + (None, ValueType::Cell, Memory::MappedCell { id, index }) => Ok(CellReference { memory_id: *id, index: *index, - }, + }), ( Some(subfield_chain), ValueType::Array(_, _) | ValueType::DictStruct(_), @@ -1154,7 +1154,7 @@ where r_panic!("Expected cell type in variable target: {target}"); }; r_assert!(cell_index < *len, "Cell reference out of bounds on variable target: {target}. This should not occur."); - CellReference { + Ok(CellReference { memory_id: *id, index: Some(match memory { Memory::Cells { id: _, len: _ } => cell_index, @@ -1165,7 +1165,7 @@ where } => *start_index + cell_index, _ => unreachable!(), }), - } + }) } // valid states, user error ( @@ -1201,7 +1201,7 @@ where ) => r_panic!( "Invalid memory for value type in target: {target}. This should not occur." ), - }) + } } /// Return a list of cell references for an array of cells (not an array of structs) @@ -1363,10 +1363,16 @@ where /// Return the absolute type and memory allocation for a variable name fn get_base_variable_memory(&self, var_name: &str) -> Result<(&ValueType, &Memory), String> { - match (self.outer_scope, self.variable_memory.get(var_name)) { - (_, Some((value_type, memory))) => Ok((value_type, memory)), - (Some(outer_scope), None) => outer_scope.get_base_variable_memory(var_name), - (None, None) => r_panic!("No variable found with name \"{var_name}\"."), + match ( + self.outer_scope, + self.types_only, + self.variable_memory.get(var_name), + ) { + (_, _, Some((value_type, memory))) => Ok((value_type, memory)), + (Some(outer_scope), false, None) => outer_scope.get_base_variable_memory(var_name), + (None, _, None) | (Some(_), true, None) => { + r_panic!("No variable found in scope with name \"{var_name}\".") + } } } diff --git a/compiler/src/parser.rs b/compiler/src/parser.rs index 5871ef4..0bc6ef1 100644 --- a/compiler/src/parser.rs +++ b/compiler/src/parser.rs @@ -1741,9 +1741,10 @@ mod parser_tests { ])) } + // TODO: make context-based parser for brainfuck and refactor these tests #[test] fn inline_bf_2() { - assert!(parse::(&[ + assert!(parse::(&[ Token::Cell, Token::Name(String::from("v")), Token::Semicolon, @@ -1788,7 +1789,7 @@ mod parser_tests { #[test] fn inline_bf_3() { - assert!(parse::(&[ + assert!(parse::(&[ Token::Bf, Token::OpenBrace, Token::Name(String::from("vvvv")), diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index ed17789..bc06416 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -44,7 +44,7 @@ pub mod black_box_tests { enable_2d_grid: false, }; - const OPT_NONE_TILES: MastermindConfig = MastermindConfig { + const OPT_NONE_2D_TILES: MastermindConfig = MastermindConfig { optimise_generated_code: false, optimise_generated_all_permutations: false, optimise_cell_clearing: false, @@ -54,10 +54,10 @@ pub mod black_box_tests { optimise_constants: false, optimise_empty_blocks: false, memory_allocation_method: 3, - enable_2d_grid: false, + enable_2d_grid: true, }; - const OPT_NONE_SPIRAL: MastermindConfig = MastermindConfig { + const OPT_NONE_2D_SPIRAL: MastermindConfig = MastermindConfig { optimise_generated_code: false, optimise_generated_all_permutations: false, optimise_cell_clearing: false, @@ -67,10 +67,10 @@ pub mod black_box_tests { optimise_constants: false, optimise_empty_blocks: false, memory_allocation_method: 2, - enable_2d_grid: false, + enable_2d_grid: true, }; - const OPT_NONE_ZIG_ZAG: MastermindConfig = MastermindConfig { + const OPT_NONE_2D_ZIG_ZAG: MastermindConfig = MastermindConfig { optimise_generated_code: false, optimise_generated_all_permutations: false, optimise_cell_clearing: false, @@ -80,7 +80,7 @@ pub mod black_box_tests { optimise_constants: false, optimise_empty_blocks: false, memory_allocation_method: 1, - enable_2d_grid: false, + enable_2d_grid: true, }; const BVM_CONFIG_1D: BrainfuckConfig = BrainfuckConfig { @@ -118,10 +118,11 @@ pub mod black_box_tests { fn compile_program<'a, TC: 'static + TapeCellVariant, OC: 'static + OpcodeVariant>( program: &str, config: Option, - ) -> Result, String> + ) -> Result where BrainfuckBuilderData: BrainfuckBuilder, CellAllocatorData: CellAllocator, + Vec: BrainfuckProgram, { let ctx = MastermindContext { config: config.unwrap_or(OPT_NONE), @@ -131,7 +132,7 @@ pub mod black_box_tests { let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); let bf_code = ctx.ir_to_bf(instructions, None)?; - Ok(bf_code) + Ok(bf_code.to_string()) } #[test] @@ -475,32 +476,30 @@ output x + 'f'; } #[test] - fn assignments_9() -> Result<(), String> { + fn assignments_9() { let program = r#" cell x = 128; x += 128; output x + 'f'; "#; - let code = compile_program::(program, Some(OPT_ALL))?.to_string(); + let code = compile_program::(program, Some(OPT_ALL)).unwrap(); println!("{code}"); assert!(code.len() < 200); - assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None)?, "f"); - Ok(()) + assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None).unwrap(), "f"); } #[test] - fn assignments_9a() -> Result<(), String> { + fn assignments_9a() { let program = r#" cell x = 126; x += 2; x += 128; output x + 'f'; "#; - let code = compile_program::(program, Some(OPT_ALL))?.to_string(); + let code = compile_program::(program, Some(OPT_ALL)).unwrap(); println!("{code}"); assert!(code.len() < 200); - assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None)?, "f"); - Ok(()) + assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None).unwrap(), "f"); } #[test] @@ -728,7 +727,7 @@ output 10; } #[test] - fn functions_2() -> Result<(), String> { + fn functions_2() { let program = r#" cell global_var = '0'; @@ -754,12 +753,9 @@ output global_var; output 10; "#; - let code = compile_program::(program, None) - .unwrap() - .to_string(); + let code = compile_program::(program, None).unwrap(); println!("{code}"); - assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None)?, "01231\n"); - Ok(()) + assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None).unwrap(), "01231\n"); } #[test] @@ -1693,7 +1689,7 @@ struct Frame f; "#; assert_eq!( compile_and_run::(program, "").unwrap_err(), - "" + "Subfields \"marker\" and \"temp_cells\" overlap in struct." ); } @@ -1710,7 +1706,7 @@ struct Frame f; "#; assert_eq!( compile_program::(program, None).unwrap_err(), - "" + "Cannot create struct field \"cell marker @-2\". Expected non-negative cell offset." ); } @@ -1731,7 +1727,7 @@ output g.b; "#; assert_eq!( compile_program::(program, None).unwrap_err(), - "" + "Subfields \"a\" and \"b\" overlap in struct." ); } @@ -1929,7 +1925,7 @@ output '0' + sizeof(g[2].blue) } #[test] - fn memory_specifiers_1() -> Result<(), String> { + fn memory_specifiers_1() { let program = r#" cell foo @3 = 2; { @@ -1941,36 +1937,33 @@ cell foo @3 = 2; } output foo; "#; - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None).unwrap(); assert_eq!(code, ">>>++<<<++++++++++++[->>>++++++++++<<<][-]>>>."); - assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None)?, "z"); - Ok(()) + assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None).unwrap(), "z"); } #[test] - fn memory_specifiers_2() -> Result<(), String> { + fn memory_specifiers_2() { let program = r#" cell a @5 = 4; cell foo @0 = 2; cell b = 10; "#; - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None).unwrap(); println!("{code}"); assert!(code.starts_with(">>>>>++++<<<<<++>++++++++++")); - Ok(()) } #[test] - fn memory_specifiers_3() -> Result<(), String> { + fn memory_specifiers_3() { let program = r#" cell a @1 = 1; cell foo @0 = 2; cell b = 3; "#; - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None).unwrap(); println!("{code}"); assert!(code.starts_with(">+<++>>+++")); - Ok(()) } #[test] @@ -2013,7 +2006,7 @@ bf @a {.} } #[test] - fn variable_location_specifiers_2() -> Result<(), String> { + fn variable_location_specifiers_2() { let program = r#" struct Test {cell[3] a @0; cell b;} struct Test t; @@ -2022,14 +2015,13 @@ bf @t.a { [+.>] } "#; - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None).unwrap(); assert_eq!(code, ",>,>,<<[+.>]"); - assert_eq!(run_code(BVM_CONFIG_1D, &code, "wxy", None)?, "xyz"); - Ok(()) + assert_eq!(run_code(BVM_CONFIG_1D, &code, "wxy", None).unwrap(), "xyz"); } #[test] - fn variable_location_specifiers_2a() -> Result<(), String> { + fn variable_location_specifiers_2a() { let program = r#" struct Test {cell[3] a @0; cell b;} struct Test t; @@ -2038,10 +2030,9 @@ bf @t { [+.>] } "#; - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None).unwrap(); assert_eq!(code, ",>,>,<<[+.>]"); - assert_eq!(run_code(BVM_CONFIG_1D, &code, "wxy", None)?, "xyz"); - Ok(()) + assert_eq!(run_code(BVM_CONFIG_1D, &code, "wxy", None).unwrap(), "xyz"); } #[test] @@ -2163,7 +2154,7 @@ func(a.jj.j); } #[test] - fn assertions_1() -> Result<(), String> { + fn assertions_1() { let program = r#" cell a @0 = 5; output a; @@ -2171,14 +2162,13 @@ assert a equals 2; a = 0; output a; "#; - let code = compile_program::(program, Some(OPT_ALL))?.to_string(); + let code = compile_program::(program, Some(OPT_ALL)).unwrap(); println!("{code}"); assert!(code.starts_with("+++++.--.")); - Ok(()) } #[test] - fn assertions_2() -> Result<(), String> { + fn assertions_2() { let program = r#" cell a @0 = 2; output a; @@ -2186,10 +2176,9 @@ assert a unknown; a = 0; output a; "#; - let code = compile_program::(program, Some(OPT_ALL))?.to_string(); + let code = compile_program::(program, Some(OPT_ALL)).unwrap(); println!("{code}"); assert!(code.starts_with("++.[-].")); - Ok(()) } #[test] @@ -2200,9 +2189,7 @@ bf { +[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+. } "#; - let code = compile_program::(program, None) - .unwrap() - .to_string(); + let code = compile_program::(program, None).unwrap(); assert_eq!( code, ",.[-]+[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+." @@ -2214,7 +2201,7 @@ bf { } #[test] - fn inline_brainfuck_2() -> Result<(), String> { + fn inline_brainfuck_2() { let program = r#" // cell a @0; // cell b @1; @@ -2223,7 +2210,7 @@ bf @3 { +[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+. } "#; - let code = compile_program::(program, None)?.to_string(); + let code = compile_program::(program, None).unwrap(); println!("{code}"); assert!(code.starts_with( ">>>,.[-]+[-->-[>>+>-----<<]<--<---]>-.>>>+.>>..+++[.>]<<<<.+++.------.<<-.>>>>+." @@ -2232,7 +2219,6 @@ bf @3 { run_code(BVM_CONFIG_1D, &code, "~", None).unwrap(), "~Hello, World!" ); - Ok(()) } #[test] @@ -2256,9 +2242,7 @@ bf @0 clobbers *str { } assert *str equals 0; "#; - let code = compile_program::(program, None) - .unwrap() - .to_string(); + let code = compile_program::(program, None).unwrap(); println!("{code}"); assert!(code.starts_with(",>,>,<<[+>]<<<[.[-]>]<<<")); assert_eq!(run_code(BVM_CONFIG_1D, &code, "HEY", None).unwrap(), "IFZ"); @@ -2284,11 +2268,9 @@ bf { ] } "#; - let code = compile_program::(program, None) - .unwrap() - .to_string(); + let code = compile_program::(program, None).unwrap(); println!("{code}"); - assert!(code.starts_with(".----------[++++++++++")); + assert!(code.starts_with(",----------[++++++++++")); assert!(code.ends_with("[-],----------]")); assert_eq!( run_code(BVM_CONFIG_1D, &code, "line of input\n", None).unwrap(), @@ -2324,11 +2306,9 @@ bf { ] } "#; - let code = compile_program::(program, None) - .unwrap() - .to_string(); + let code = compile_program::(program, None).unwrap(); println!("{code}"); - assert!(code.starts_with(".----------[++++++++++")); + assert!(code.starts_with(",----------[++++++++++")); assert!(code.ends_with("[-],----------]")); assert_eq!( run_code(BVM_CONFIG_1D, &code, "hello\n", None).unwrap(), @@ -2351,7 +2331,7 @@ bf { "#; assert_eq!( compile_program::(program, None).unwrap_err(), - "" + "No variable found in scope with name \"b\"." ); } @@ -2365,9 +2345,7 @@ bf { } "#; assert_eq!( - compile_program::(program, None) - .unwrap() - .to_string(), + compile_program::(program, None).unwrap(), ",>,>,<<>>>>>+[-]<<<<<" ); } @@ -2377,9 +2355,7 @@ bf { let program = r#" bf {,.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvvv.+++.------.vv-.^^^^+.} "#; - let code = compile_program::(program, None) - .unwrap() - .to_string(); + let code = compile_program::(program, None).unwrap(); assert_eq!( code, ",.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvvv.+++.------.vv-.^^^^+." @@ -2398,7 +2374,7 @@ bf {,.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvstvv.+++.------.vv-.^^^ assert_eq!( compile_program::(program, None).unwrap_err(), // TODO: make sure this works correctly after refactoring tokeniser - "Invalid Inline Brainfuck Characters in vvstvv" + "" ); } @@ -2421,9 +2397,7 @@ bf {,.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvstvv.+++.------.vv-.^^^ let program = r#" output 'h'; "#; - let code = compile_program::(program, Some(OPT_ALL)) - .unwrap() - .to_string(); + let code = compile_program::(program, Some(OPT_ALL)).unwrap(); println!("{code}"); assert!(code.len() < 30); assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None).unwrap(), "h"); @@ -2440,9 +2414,7 @@ b -= 43; output b; output a + 3; "#; - let code = compile_program::(program, Some(OPT_ALL)) - .unwrap() - .to_string(); + let code = compile_program::(program, Some(OPT_ALL)).unwrap(); println!("{code}"); assert!(code.len() < 100); assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None).unwrap(), "tIJ"); @@ -2481,9 +2453,7 @@ cell foo @0 = 2; cell b = 3; "#; assert_eq!( - compile_program::(program, None) - .unwrap() - .to_string(), + compile_program::(program, None).unwrap(), ">^^++++" ); } @@ -2499,9 +2469,7 @@ cell foo @0 = 2; cell b = 3; "#; assert_eq!( - compile_program::(program, None) - .unwrap() - .to_string(), + compile_program::(program, None).unwrap(), ">^^[-]+>>>>>[-]++>>>>>[-]+++<<<<<<<<<<+++" ); } @@ -2558,9 +2526,7 @@ cell i = 1; cell j = 1; "#; assert_eq!( - compile_program::(program, Some(OPT_NONE_TILES)) - .unwrap() - .to_string(), + compile_program::(program, Some(OPT_NONE_2D_TILES)).unwrap(), "+vv+^^+>vv+^+^+" ); } @@ -2586,11 +2552,9 @@ output g; output h; output i; "#; - let code = compile_program::(program, Some(OPT_NONE_TILES)) - .unwrap() - .to_string(); + let code = compile_program::(program, Some(OPT_NONE_2D_TILES)).unwrap(); println!("{code}"); - assert!(false); // TODO: add some test here for memory allocation + assert!(code.contains("v") || code.contains("^")); assert_eq!( run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), "123456789" @@ -2604,7 +2568,7 @@ cell a @(2, 4) = 1; cell[4] b @(0, 4); "#; assert_eq!( - compile_program::(program, Some(OPT_NONE_TILES)).unwrap_err(), + compile_program::(program, Some(OPT_NONE_2D_TILES)).unwrap_err(), "Location specifier @(0, 4) conflicts with another allocation" ); } @@ -2625,11 +2589,10 @@ output b[2]; output b[3]; output a; "#; - let code = compile_program::(program, Some(OPT_NONE_TILES)) - .unwrap() - .to_string(); + let code = + compile_program::(program, Some(OPT_NONE_2D_TILES)).unwrap(); println!("{code}"); - assert!(false); // TODO: add some test here for memory allocation + assert!(code.contains("v") || code.contains("^")); assert_eq!(run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), "12345"); } @@ -2647,9 +2610,7 @@ cell i = 1; cell j = 1; "#; assert_eq!( - compile_program::(program, Some(OPT_NONE_ZIG_ZAG)) - .unwrap() - .to_string(), + compile_program::(program, Some(OPT_NONE_2D_ZIG_ZAG)).unwrap(), "+>+<^+>>v+<^+<^+>>>vv+<^+<^+" ); } @@ -2676,12 +2637,10 @@ output g; output h; output i; "#; - - let code = compile_program::(program, Some(OPT_NONE_ZIG_ZAG)) - .unwrap() - .to_string(); - println!("{code}",); - assert!(false); // TODO: add some test here for memory allocation + let code = + compile_program::(program, Some(OPT_NONE_2D_ZIG_ZAG)).unwrap(); + println!("{code}"); + assert!(code.contains("v") || code.contains("^")); assert_eq!( run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), "123456789" @@ -2695,7 +2654,8 @@ cell a @(2, 4) = 1; cell[4] b @(0, 4); "#; assert_eq!( - compile_program::(program, Some(OPT_NONE_ZIG_ZAG)).unwrap_err(), + compile_program::(program, Some(OPT_NONE_2D_ZIG_ZAG)) + .unwrap_err(), "Location specifier @(0, 4) conflicts with another allocation" ); } @@ -2716,11 +2676,10 @@ output b[2]; output b[3]; output a; "#; - let code = compile_program::(program, Some(OPT_NONE_ZIG_ZAG)) - .unwrap() - .to_string(); + let code = + compile_program::(program, Some(OPT_NONE_2D_ZIG_ZAG)).unwrap(); println!("{code}"); - assert!(false); // TODO: add some test here for memory allocation + assert!(code.contains("v") || code.contains("^")); assert_eq!(run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), "12345"); } @@ -2738,9 +2697,7 @@ cell i = 1; cell j = 1; "#; assert_eq!( - compile_program::(program, Some(OPT_NONE_SPIRAL)) - .unwrap() - .to_string(), + compile_program::(program, Some(OPT_NONE_2D_SPIRAL)).unwrap(), "^+>+v+<+<+^+^+>+>+" ); } @@ -2766,11 +2723,9 @@ output g; output h; output i; "#; - let code = compile_program::(program, Some(OPT_NONE_SPIRAL)) - .unwrap() - .to_string(); + let code = compile_program::(program, Some(OPT_NONE_2D_SPIRAL)).unwrap(); println!("{code}"); - assert!(false); // TODO: add some test here for memory allocation + assert!(code.contains("v") || code.contains("^")); assert_eq!( run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), "123456789" @@ -2785,7 +2740,7 @@ cell a @(2, 4) = 1; cell[4] b @(0, 4); "#; assert_eq!( - compile_program::(program, Some(OPT_NONE_SPIRAL)).unwrap_err(), + compile_program::(program, Some(OPT_NONE_2D_SPIRAL)).unwrap_err(), "Location specifier @(0,4) conflicts with another allocation" ); } @@ -2806,11 +2761,9 @@ output b[2]; output b[3]; output a; "#; - let code = compile_program::(program, Some(OPT_NONE_SPIRAL)) - .unwrap() - .to_string(); + let code = compile_program::(program, Some(OPT_NONE_2D_SPIRAL)).unwrap(); println!("{code}"); - assert!(false); // TODO: add some test here for memory allocation + assert!(code.contains("v") || code.contains("^")); assert_eq!(run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), "12345"); } } From 1785626c0446dede8f1e1876ae068ed0387c6fe2 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Mon, 27 Oct 2025 10:48:42 +1100 Subject: [PATCH 22/56] Fix constants optimiser and some bf2d tests --- compiler/src/backend/bf2d.rs | 5 ++- compiler/src/backend/common.rs | 42 +++++++++---------- .../src/{ => backend}/constants_optimiser.rs | 24 ++++++----- compiler/src/backend/mod.rs | 2 + compiler/src/lib.rs | 1 - compiler/src/main.rs | 1 - compiler/src/tests.rs | 27 ++++++------ 7 files changed, 54 insertions(+), 48 deletions(-) rename compiler/src/{ => backend}/constants_optimiser.rs (87%) diff --git a/compiler/src/backend/bf2d.rs b/compiler/src/backend/bf2d.rs index 56b9446..c851226 100644 --- a/compiler/src/backend/bf2d.rs +++ b/compiler/src/backend/bf2d.rs @@ -257,7 +257,10 @@ impl CellAllocator for CellAllocatorData { loops += 1; } } else { - panic!("Memory Allocation Method not implemented"); + r_panic!( + "Memory allocation method {} not implemented.", + self.config.memory_allocation_method + ); } } diff --git a/compiler/src/backend/common.rs b/compiler/src/backend/common.rs index 051dfac..e5e47a4 100644 --- a/compiler/src/backend/common.rs +++ b/compiler/src/backend/common.rs @@ -1,5 +1,5 @@ +use super::constants_optimiser::calculate_optimal_addition; use crate::{ - constants_optimiser::calculate_optimal_addition, frontend::{CellLocation, Instruction, MemoryId}, macros::macros::{r_assert, r_panic}, misc::{MastermindConfig, MastermindContext}, @@ -229,28 +229,24 @@ outside of loop it was allocated" // TODO: fix bug, if only one multiplication then we can have a value already in the cell, but never otherwise // not sure if these optimisations should be in the builder step or in the compiler - // if self.config.optimise_constants { - // // ops.move_to_cell(&mut head_pos, cell); - // // here we use an algorithm that finds the best combo of products and constants to make the number to minimise bf code - // // first we get the closest allocated cell so we can calculate the distance cost of multiplying - // // TODO: instead find the nearest zero cell, doesn't matter if allocated or not - // let temp_cell = allocator.allocate_temp_cell(cell); - - // let optimised_ops = - // calculate_optimal_addition(imm as i8, ops.head_pos, cell, temp_cell); - - // ops.head_pos = optimised_ops.head_pos; - // ops.extend(optimised_ops.opcodes); - - // allocator.free(temp_cell, 1)?; - // } else { - // ops.move_to_cell(cell); - // ops.add_to_current_cell(imm as i8); - // } - - // TODO: fix optimisations - ops.move_to_cell(cell); - ops.add_to_current_cell(imm as i8); + if self.config.optimise_constants { + // ops.move_to_cell(&mut head_pos, cell); + // here we use an algorithm that finds the best combo of products and constants to make the number to minimise bf code + // first we get the closest allocated cell so we can calculate the distance cost of multiplying + // TODO: instead find the nearest zero cell, doesn't matter if allocated or not + let temp_cell = allocator.allocate_temp_cell(cell); + + let optimised_ops = + calculate_optimal_addition(imm as i8, ops.head_pos, cell, temp_cell); + + ops.extend(optimised_ops.opcodes); + ops.head_pos = optimised_ops.head_pos; + + allocator.free(temp_cell, 1)?; + } else { + ops.move_to_cell(cell); + ops.add_to_current_cell(imm as i8); + } if imm != 0 { if *alloc_loop_depth != current_loop_depth { diff --git a/compiler/src/constants_optimiser.rs b/compiler/src/backend/constants_optimiser.rs similarity index 87% rename from compiler/src/constants_optimiser.rs rename to compiler/src/backend/constants_optimiser.rs index dfa3505..8284740 100644 --- a/compiler/src/constants_optimiser.rs +++ b/compiler/src/backend/constants_optimiser.rs @@ -1,6 +1,6 @@ -use crate::backend::{ - bf2d::{Opcode2D, TapeCell2D}, - common::{BrainfuckBuilder, BrainfuckBuilderData}, +use super::common::{ + BrainfuckBuilder, BrainfuckBuilderData, CellAllocator, CellAllocatorData, OpcodeVariant, + TapeCellVariant, }; // basically, most ascii characters are large numbers, which are more efficient to calculate with multiplication than with a bunch of + or - @@ -11,12 +11,16 @@ use crate::backend::{ // 7 * 4 : {>}(tricky)+++++++[<++++>-]< // 5 * 5 * 7 : +++++[>+++++<-]>[<+++++++>-]< // TODO: make unit tests for this -pub fn calculate_optimal_addition( +pub fn calculate_optimal_addition( value: i8, - start_cell: TapeCell2D, - target_cell: TapeCell2D, - temp_cell: TapeCell2D, -) -> BrainfuckBuilderData { + start_cell: TC, + target_cell: TC, + temp_cell: TC, +) -> BrainfuckBuilderData +where + BrainfuckBuilderData: BrainfuckBuilder, + CellAllocatorData: CellAllocator, +{ // can't abs() i8 directly because there is no +128i8, so abs(-128i8) crashes let abs_value = (value as i32).abs(); @@ -79,7 +83,7 @@ pub fn calculate_optimal_addition( ops.move_to_cell(temp_cell); ops.add_to_current_cell(a as i8); - ops.push(Opcode2D::OpenLoop); + ops.open_loop(); ops.add_to_current_cell(-1); ops.move_to_cell(target_cell); if value < 0 { @@ -88,7 +92,7 @@ pub fn calculate_optimal_addition( ops.add_to_current_cell(b as i8); } ops.move_to_cell(temp_cell); - ops.push(Opcode2D::CloseLoop); + ops.close_loop(); ops.move_to_cell(target_cell); if value < 0 { ops.add_to_current_cell(-(c as i8)); diff --git a/compiler/src/backend/mod.rs b/compiler/src/backend/mod.rs index 8683037..8f79770 100644 --- a/compiler/src/backend/mod.rs +++ b/compiler/src/backend/mod.rs @@ -2,3 +2,5 @@ pub mod common; pub mod bf; pub mod bf2d; + +mod constants_optimiser; diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index 8c83c5f..cc64f8b 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -5,7 +5,6 @@ mod backend; mod brainfuck; mod brainfuck_optimiser; -mod constants_optimiser; mod frontend; mod macros; mod misc; diff --git a/compiler/src/main.rs b/compiler/src/main.rs index c4f393b..eea816f 100644 --- a/compiler/src/main.rs +++ b/compiler/src/main.rs @@ -5,7 +5,6 @@ mod backend; mod brainfuck; mod brainfuck_optimiser; -mod constants_optimiser; mod frontend; mod macros; mod misc; diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index bc06416..61839c6 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -2399,7 +2399,7 @@ output 'h'; "#; let code = compile_program::(program, Some(OPT_ALL)).unwrap(); println!("{code}"); - assert!(code.len() < 30); + assert!(code.len() < 35); assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None).unwrap(), "h"); } @@ -2416,7 +2416,7 @@ output a + 3; "#; let code = compile_program::(program, Some(OPT_ALL)).unwrap(); println!("{code}"); - assert!(code.len() < 100); + assert!(code.len() < 400); assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None).unwrap(), "tIJ"); } @@ -2424,8 +2424,8 @@ output a + 3; #[test] fn unimplemented_memory_allocation() { let program = r#" - cell[15] arr @1; - cell a = 'G'; +cell[15] arr @1; +cell a = 'G'; "#; let cfg = MastermindConfig { optimise_generated_code: false, @@ -2440,8 +2440,8 @@ output a + 3; enable_2d_grid: false, }; assert_eq!( - compile_program::(program, Some(cfg)).unwrap_err(), - "Memory Allocation Method not implemented" + compile_program::(program, Some(cfg)).unwrap_err(), + "Memory allocation method 128 not implemented." ); } @@ -2526,7 +2526,7 @@ cell i = 1; cell j = 1; "#; assert_eq!( - compile_program::(program, Some(OPT_NONE_2D_TILES)).unwrap(), + compile_program::(program, Some(OPT_NONE_2D_TILES)).unwrap(), "+vv+^^+>vv+^+^+" ); } @@ -2552,7 +2552,8 @@ output g; output h; output i; "#; - let code = compile_program::(program, Some(OPT_NONE_2D_TILES)).unwrap(); + let code = + compile_program::(program, Some(OPT_NONE_2D_TILES)).unwrap(); println!("{code}"); assert!(code.contains("v") || code.contains("^")); assert_eq!( @@ -2723,7 +2724,8 @@ output g; output h; output i; "#; - let code = compile_program::(program, Some(OPT_NONE_2D_SPIRAL)).unwrap(); + let code = + compile_program::(program, Some(OPT_NONE_2D_SPIRAL)).unwrap(); println!("{code}"); assert!(code.contains("v") || code.contains("^")); assert_eq!( @@ -2740,8 +2742,8 @@ cell a @(2, 4) = 1; cell[4] b @(0, 4); "#; assert_eq!( - compile_program::(program, Some(OPT_NONE_2D_SPIRAL)).unwrap_err(), - "Location specifier @(0,4) conflicts with another allocation" + compile_program::(program, Some(OPT_NONE_2D_SPIRAL)).unwrap_err(), + "Location specifier @(0, 4) conflicts with another allocation" ); } @@ -2761,7 +2763,8 @@ output b[2]; output b[3]; output a; "#; - let code = compile_program::(program, Some(OPT_NONE_2D_SPIRAL)).unwrap(); + let code = + compile_program::(program, Some(OPT_NONE_2D_SPIRAL)).unwrap(); println!("{code}"); assert!(code.contains("v") || code.contains("^")); assert_eq!(run_code(BVM_CONFIG_2D, &code, "", None).unwrap(), "12345"); From f3d4fdd63fb431b655c9d27614400a07123a62c8 Mon Sep 17 00:00:00 2001 From: Missing Date: Sun, 2 Nov 2025 19:17:15 -0600 Subject: [PATCH 23/56] Updated the stack module Much better efficiency code size-wise, although you can't push a zero onto a stack anymore. --- programs/std/stack | 257 +++++++++++++++++++-------------------------- 1 file changed, 109 insertions(+), 148 deletions(-) diff --git a/programs/std/stack b/programs/std/stack index 3981f18..950df56 100644 --- a/programs/std/stack +++ b/programs/std/stack @@ -1,191 +1,148 @@ -// stack program. by MSMissing on github - -// NOTE: STACK OVERFLOWS/UNDERFLOWS WILL FUCK UP YOUR PROGRAM -// Also, this has inline brainfuck in it, so it probably won't be readable. - -struct stack255 { - cell[510] arr; - cell one; - cell len; - cell end; -} - -struct stack32 { - cell[64] arr; - cell one; +struct stack32 { // STACK CANNOT CONTAIN ZEROES. cell len; - cell end; + cell zero; + cell[32] content; + cell end; // if this is not equal to zero, panic. } -// == DOCUMENTATION == - -// struct stack255 -- a stack object you can push to and pop from. -// can hold up to 255 values. - -// struct stack32 -- smaller stack. holds 32 values. - -// push(stack, x) -- pushes x to the stack - -// push_d(stack, x) -- pushes x to the stack, destroying it in the -// process. faster than push - -// push_safe(stack, x) -- pushes x to the stack, and outputs an error if -// if it can't. cannot end the program. +// DOCUMENTATION -// push_d_safe(stack, x) -- same as push_safe, but destructive +// struct stack32 - 32-cell stack; uses 35 cells. -// pop(stack, out) -- pulls the last value from the stack and puts it -// into out. +// push_d(stack, value) - push a value destructively to the stack. +// push(stack, value) - push a value to the stack. slower than push_d +// unshift_d(stack, value) - push a value destructively into the bottom of the stack. +// unshift(stack, value) - push a value into the bottom of the stack. -// pop_safe(stack, out) -- same as pop, but outputs an error if the stack -// underflows. cannot end the program. +// pop(stack, *output) - pop a value from the stack. +// shift(stack, *output) - shift a value from the bottom of the stack. +// shift is faster than pop. +// reverse(stack) - reverses the stack. +// clear(stack) - clears the stack. Suggested if a stack is about to go out of scope. -// REMEMBER TO INITIALIZE YOUR STACKS OR THE CODE WILL LIKELY BE SLOWER -fn init_stack(struct stack255 stack) { - stack.one = 1; -} - -fn init_stack(struct stack32 stack) { - stack.one = 1; -} - -fn move_to_current(cell one) { // move the pointer to the last value pushe - bf @one { // input is stack.one - [<<]>>> +fn __move_to_top(struct stack32 stack) { + bf @stack.zero clobbers *stack.content { + >[>]< } } -fn move_to_one() { // move pointer to stack.one after calling move_to_current() + +fn __back_to_zero() { bf { - <[>>]<< + [<] } } -fn push_d(struct stack255 stack, cell in) { // destructive version of push - stack.one = 1; +fn push_d(struct stack32 stack, cell value) { + value -= 1; stack.len += 1; - - // set usage flag - bf @stack.one clobbers *stack.arr { - [<<]+[>>]<< + bf @stack.zero clobbers *stack.content { + >[>]+ } - - // drain input to the array item - while in { - in -= 1; - move_to_current(stack.one); - bf clobbers *stack.arr {+} - move_to_one(); + __back_to_zero(); + while value { + value -= 1; + __move_to_top(stack); + bf clobbers *stack.content {+} + __back_to_zero(); } } -fn push_d(struct stack32 stack, cell in) { - stack.one = 1; - stack.len += 1; - - // set usage flag - bf @stack.one clobbers *stack.arr { - [<<]+[>>]<< - } - - // drain input to the array item - while in { - in -= 1; - move_to_current(stack.one); - bf clobbers *stack.arr {+} - move_to_one(); - } +fn push(struct stack32 stack, cell value) { + cell _value = value; + push_d(stack, _value); } -fn push(struct stack255 stack, cell x) { - cell in = x; // copy the input - push_d(stack, in); + +fn pop(struct stack32 stack, cell out) { + stack.len -= 1; + out = 0; + bf @stack.zero {>[>]<[>+<-]>} + bf { [-<< } + __back_to_zero(); + out += 1; + __move_to_top(stack); + bf { >>]<< } + __back_to_zero(); } -fn push(struct stack32 stack, cell x) { - cell in = x; - push_d(stack, x); +fn pop(struct stack32 stack) { + stack.len -= 1; + __move_to_top(stack); + bf clobbers *stack.content {[-]<} + __back_to_zero(); } -fn push_safe(struct stack255 stack, cell x) { - if stack.len - 255 { - push(stack, x); - } else { - output "STACK OVERFLOW"; - } +fn shift(struct stack32 stack, cell out) { + out = 0; + stack.len -= 1; + drain stack.content[0] into out; + bf @stack.content[1] clobbers *stack.content { [[<+>-]>]<<[<]>> } } -fn push_safe(struct stack32 stack, cell x) { - if stack.len - 32 { - push(stack, x); - } else { - output "STACK OVERFLOW"; - } +fn shift(struct stack32 stack) { + stack.len -= 1; + bf @stack.content[1] clobbers *stack.content { [[<+>-]>]<<[<]>> } +} + +fn unshift_d(struct stack32 stack, cell value) { + stack.len += 1; + __move_to_top(stack); + bf clobbers *stack.content { [[->+<]<] } + stack.content[0] = 0; + drain value into stack.content[0]; +} + +fn unshift(struct stack32 stack, cell value) { + cell _value = value; + unshift_d(stack, value); } -fn push_d_safe(struct stack255 stack, cell in) { - if stack.len - 255 { - push_d(stack, in); - } else { - in = 0; - output "STACK OVERFLOW"; +fn clear(struct stack32 stack) { + while stack.len { + pop(stack); } + assert *stack.content equals 0; } -fn push_d_safe(struct stack32 stack, cell in) { - if stack.len - 32 { - push_d(stack, in); - } else { - in = 0; - output "STACK OVERFLOW"; +fn move_stack(struct stack32 in, struct stack32 out) { // out must be empty + cell x; + while in.len { + shift(in, x); + push_d(out, x); } + assert *in.content equals 0; } -fn pop(struct stack255 stack, cell return) { - return = 0; - stack.one = 1; - stack.len -= 1; - - move_to_current(stack.one); - bf clobbers *stack.arr return {[-} - move_to_one(); - return += 1; - move_to_current(stack.one); - bf {]<->>>} - move_to_one(); -} - -fn pop(struct stack32 stack, cell return) { - return = 0; - stack.one = 1; - stack.len -= 1; - - move_to_current(stack.one); - bf clobbers *stack.arr return {[-} - move_to_one(); - return += 1; - move_to_current(stack.one); - bf clobbers *stack.arr {]<->>>} - move_to_one(); -} - -fn pop_safe(struct stack255 stack, cell return) { - if stack.len { - pop(stack, return); - } else { - return = 0; - output "STACK UNDERFLOW"; +fn copy_stack(struct stack32 src, struct stack32 out) { + cell x; + struct stack32 _src; + while src.len { + shift(src, x); + push(out, x); + push_d(_src, x); + } + while _src.len { + shift(_src, x); + push_d(src, x); } + assert *_src.content equals 0; } -fn pop_safe(struct stack32 stack, cell return) { - if stack.len { - pop(stack, return); - } else { - return = 0; - output "STACK UNDERFLOW"; +fn reverse(struct stack32 stack, struct stack32 out) { // out must be empty + cell x; + while stack.len { + pop(stack, x); + push_d(out, x); } + assert *stack.content equals 0; +} + +fn reverse(struct stack32 stack) { + struct stack32 out; + reverse(stack, out); + move_stack(out, stack); } @@ -194,3 +151,7 @@ fn pop_safe(struct stack32 stack, cell return) { + + + + From a4f85f419bee40b83e9a563dc64530ccbf27ba0e Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Fri, 7 Nov 2025 14:44:44 +1100 Subject: [PATCH 24/56] WIP: start parser rewrite --- compiler/src/backend/bf.rs | 2 +- compiler/src/backend/bf2d.rs | 2 +- compiler/src/backend/common.rs | 3 +- compiler/src/frontend.rs | 8 +- compiler/src/lib.rs | 3 - compiler/src/parser.rs | 1813 ----------------- compiler/src/parser/expressions.rs | 285 +++ compiler/src/parser/mod.rs | 5 + compiler/src/parser/old_parser.rs | 607 ++++++ .../{tokeniser.rs => parser/old_tokeniser.rs} | 132 +- compiler/src/parser/parser.rs | 473 +++++ compiler/src/parser/tests.rs | 370 ++++ compiler/src/parser/tokeniser.rs | 136 ++ compiler/src/parser/types.rs | 287 +++ 14 files changed, 2173 insertions(+), 1953 deletions(-) delete mode 100644 compiler/src/parser.rs create mode 100644 compiler/src/parser/expressions.rs create mode 100644 compiler/src/parser/mod.rs create mode 100644 compiler/src/parser/old_parser.rs rename compiler/src/{tokeniser.rs => parser/old_tokeniser.rs} (69%) create mode 100644 compiler/src/parser/parser.rs create mode 100644 compiler/src/parser/tests.rs create mode 100644 compiler/src/parser/tokeniser.rs create mode 100644 compiler/src/parser/types.rs diff --git a/compiler/src/backend/bf.rs b/compiler/src/backend/bf.rs index 89a68eb..c1d8ff4 100644 --- a/compiler/src/backend/bf.rs +++ b/compiler/src/backend/bf.rs @@ -4,7 +4,7 @@ use super::common::{ }; use crate::{ macros::macros::{r_assert, r_panic}, - tokeniser::Token, + parser::types::Token, }; pub type TapeCell = i32; diff --git a/compiler/src/backend/bf2d.rs b/compiler/src/backend/bf2d.rs index c851226..1b1ecae 100644 --- a/compiler/src/backend/bf2d.rs +++ b/compiler/src/backend/bf2d.rs @@ -4,7 +4,7 @@ use super::common::{ }; use crate::{ macros::macros::{r_assert, r_panic}, - tokeniser::Token, + parser::types::Token, }; use std::{fmt::Display, hash::Hash}; diff --git a/compiler/src/backend/common.rs b/compiler/src/backend/common.rs index e5e47a4..bc9e744 100644 --- a/compiler/src/backend/common.rs +++ b/compiler/src/backend/common.rs @@ -3,8 +3,7 @@ use crate::{ frontend::{CellLocation, Instruction, MemoryId}, macros::macros::{r_assert, r_panic}, misc::{MastermindConfig, MastermindContext}, - parser::TapeCellLocation, - tokeniser::Token, + parser::types::{TapeCellLocation, Token}, }; use std::{ diff --git a/compiler/src/frontend.rs b/compiler/src/frontend.rs index 43d1342..c591578 100644 --- a/compiler/src/frontend.rs +++ b/compiler/src/frontend.rs @@ -10,8 +10,12 @@ use crate::{ macros::macros::{r_assert, r_panic}, misc::MastermindContext, parser::{ - Clause, Expression, ExtendedOpcode, LocationSpecifier, Reference, StructFieldDefinition, - VariableDefinition, VariableTarget, VariableTargetReferenceChain, VariableTypeReference, + expressions::Expression, + types::{ + Clause, ExtendedOpcode, LocationSpecifier, Reference, StructFieldDefinition, + VariableDefinition, VariableTarget, VariableTargetReferenceChain, + VariableTypeReference, + }, }, }; diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index cc64f8b..c3365e5 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -11,7 +11,6 @@ mod misc; mod parser; mod preprocessor; mod tests; -mod tokeniser; use crate::{ backend::{ bf::{Opcode, TapeCell}, @@ -20,9 +19,7 @@ use crate::{ }, brainfuck::{BrainfuckConfig, BrainfuckContext}, misc::MastermindContext, - parser::parse, preprocessor::preprocess_from_memory, - tokeniser::tokenise, }; // stdlib dependencies: diff --git a/compiler/src/parser.rs b/compiler/src/parser.rs deleted file mode 100644 index 0bc6ef1..0000000 --- a/compiler/src/parser.rs +++ /dev/null @@ -1,1813 +0,0 @@ -// project dependencies: -use crate::{ - backend::{bf::TapeCell, bf2d::TapeCell2D, common::OpcodeVariant}, - macros::macros::{r_assert, r_panic}, - tokeniser::Token, -}; - -// stdlib dependencies -use std::{fmt::Display, mem::discriminant, num::Wrapping}; - -/// recursive function to create a tree representation of the program -pub fn parse( - tokens: &[Token], -) -> Result>, String> { - // basic steps: - // chew off tokens from the front, recursively parse blocks of tokens - let mut clauses = Vec::new(); - let mut i = 0usize; - while let Some(clause_tokens) = get_clause_tokens(&tokens[i..])? { - match ( - &clause_tokens[0], - &clause_tokens.get(1).unwrap_or(&Token::None), - &clause_tokens.get(2).unwrap_or(&Token::None), - ) { - (Token::Cell, _, _) - | (Token::Struct, Token::Name(_), Token::Name(_) | Token::OpenSquareBracket) => { - clauses.push(parse_let_clause(clause_tokens)?); - } - (Token::Struct, Token::Name(_), Token::OpenBrace) => { - clauses.push(parse_struct_clause(clause_tokens)?); - } - (Token::Plus, Token::Plus, _) | (Token::Minus, Token::Minus, _) => { - clauses.push(parse_increment_clause(clause_tokens)?); - } - (Token::Name(_), Token::EqualsSign | Token::Dot | Token::OpenSquareBracket, _) => { - clauses.extend(parse_set_clause(clause_tokens)?); - } - (Token::Drain, _, _) => { - clauses.push(parse_drain_copy_clause(clause_tokens, true)?); - } - (Token::Copy, _, _) => { - clauses.push(parse_drain_copy_clause(clause_tokens, false)?); - } - (Token::While, _, _) => { - clauses.push(parse_while_clause(clause_tokens)?); - } - (Token::Output, _, _) => { - clauses.push(parse_output_clause(clause_tokens)?); - } - (Token::Input, _, _) => { - clauses.push(parse_input_clause(clause_tokens)?); - } - (Token::Name(_), Token::OpenParenthesis, _) => { - clauses.push(parse_function_call_clause(clause_tokens)?); - } - (Token::Fn, _, _) => { - clauses.push(parse_function_definition_clause(clause_tokens)?); - } - (Token::Name(_), Token::Plus | Token::Minus, Token::EqualsSign) => { - clauses.extend(parse_add_clause(clause_tokens)?); - } - (Token::If, _, _) => { - clauses.push(parse_if_else_clause(clause_tokens)?); - } - (Token::OpenBrace, _, _) => { - let braced_tokens = get_braced_tokens(clause_tokens, BRACES)?; - let inner_clauses = parse(braced_tokens)?; - clauses.push(Clause::Block(inner_clauses)); - } - (Token::Bf, _, _) => { - clauses.push(parse_brainfuck_clause(clause_tokens)?); - } - (Token::Assert, _, _) => clauses.push(parse_assert_clause(clause_tokens)?), - // empty clause - (Token::Semicolon, _, _) => (), - // the None token usually represents whitespace, it should be filtered out before reaching this function - // Wrote out all of these possibilities so that the compiler will tell me when I haven't implemented a token - ( - Token::None - | Token::Else - | Token::Not - | Token::ClosingBrace - | Token::OpenSquareBracket - | Token::ClosingSquareBracket - | Token::OpenParenthesis - | Token::ClosingParenthesis - | Token::LessThan - | Token::MoreThan - | Token::Comma - | Token::Plus - | Token::Minus - | Token::Into - | Token::Digits(_) - | Token::Name(_) - | Token::String(_) - | Token::Character(_) - | Token::True - | Token::False - | Token::EqualsSign - | Token::Asterisk - | Token::Clobbers - | Token::Equals - | Token::Unknown - | Token::Dot - | Token::At - | Token::Struct - | Token::Caret, - _, - _, - ) => r_panic!("Invalid clause: {clause_tokens:#?}"), - }; - i += clause_tokens.len(); - } - - Ok(clauses) -} - -fn parse_let_clause(clause: &[Token]) -> Result, String> { - // cell x = 0; - // struct DummyStruct y - let mut i = 0usize; - // this kind of logic could probably be done with iterators, (TODO for future refactors) - - let (var, len) = parse_var_definition(&clause[i..], true)?; - i += len; - - if let Token::EqualsSign = &clause[i] { - i += 1; - let remaining = &clause[i..(clause.len() - 1)]; - let expr = Expression::parse(remaining)?; - // equivalent to set clause stuff - // except we need to convert a variable definition to a variable target - Ok(Clause::DefineVariable { var, value: expr }) - } else if i < (clause.len() - 1) { - r_panic!("Invalid token in let clause: {clause:#?}"); - } else { - Ok(Clause::DeclareVariable { var }) - } -} - -/// Parse tokens representing a struct definition into a clause -fn parse_struct_clause(clause: &[Token]) -> Result, String> { - let mut i = 0usize; - let Token::Struct = &clause[i] else { - r_panic!("Expected struct keyword in struct clause. This should never occur. {clause:#?}"); - }; - i += 1; - - let Token::Name(struct_name) = &clause[i] else { - r_panic!("Expected identifier in struct clause. This should never occur. {clause:#?}"); - }; - i += 1; - - let Token::OpenBrace = &clause[i] else { - r_panic!("Expected open brace in struct clause: {clause:#?}"); - }; - let braced_tokens = get_braced_tokens(&clause[i..], BRACES)?; - - let mut fields = vec![]; - - let mut j = 0usize; - loop { - let (field, len) = parse_var_definition::(&braced_tokens[j..], true)?; - j += len; - fields.push(field.try_into()?); - r_assert!( - j <= braced_tokens.len(), - "Struct definition field exceeded braces. This should never occur. {clause:#?}" - ); - let Token::Semicolon = &braced_tokens[j] else { - r_panic!("Expected semicolon in struct definition field: {clause:#?}"); - }; - j += 1; - if j == braced_tokens.len() { - break; - } - } - r_assert!( - j == braced_tokens.len(), - "Struct definitions exceeded braces. This should never occur. {clause:#?}" - ); - // i += j + 2; - - Ok(Clause::DefineStruct { - name: struct_name.clone(), - fields, - }) -} - -fn parse_add_clause(clause: &[Token]) -> Result>, String> { - let mut clauses = Vec::new(); - let mut i = 0usize; - let (var, len) = parse_var_target(&clause[i..])?; - i += len; - - let positive = match &clause[i] { - Token::Plus => true, - Token::Minus => false, - _ => { - r_panic!("Unexpected second token in add clause: {clause:#?}"); - } - }; - i += 2; // assume the equals sign is there because it was checked by the main loop - let raw_expr = Expression::parse(&clause[i..(clause.len() - 1)])?; - let expr = match positive { - true => raw_expr, - false => Expression::SumExpression { - sign: Sign::Negative, - summands: vec![raw_expr], - }, - }; - //Check if this add clause self references - let self_referencing = expr.check_self_referencing(&var); - - clauses.push(Clause::AddToVariable { - var, - value: expr, - self_referencing: self_referencing, - }); - - Ok(clauses) -} - -// currently just syntax sugar, should make it actually do post/pre increments -fn parse_increment_clause(clause: &[Token]) -> Result, String> { - let (var, _) = parse_var_target(&clause[2..])?; - //An increment clause can never be self referencing since it just VAR++ - Ok(match (&clause[0], &clause[1]) { - (Token::Plus, Token::Plus) => Clause::AddToVariable { - var, - value: Expression::NaturalNumber(1), - self_referencing: false, - }, - (Token::Minus, Token::Minus) => Clause::AddToVariable { - var, - value: Expression::NaturalNumber((-1i8 as u8) as usize), - self_referencing: false, - }, - _ => { - r_panic!("Invalid pattern in increment clause: {clause:#?}"); - } - }) - // assumed that the final token is a semicolon -} - -fn parse_set_clause(clause: &[Token]) -> Result>, String> { - // TODO: what do we do about arrays and strings and structs? - let mut clauses = Vec::new(); - let mut i = 0usize; - let (var, len) = parse_var_target(&clause[i..])?; - i += len; - - // definitely could use iterators instead (TODO for refactor) - match &clause[i] { - Token::EqualsSign => { - i += 1; - let expr = Expression::parse(&clause[i..(clause.len() - 1)])?; - let self_referencing = expr.check_self_referencing(&var); - clauses.push(Clause::SetVariable { - var, - value: expr, - self_referencing, - }); - } - Token::Plus | Token::Minus => { - let is_add = if let Token::Plus = &clause[i] { - true - } else { - false - }; - i += 1; - let Token::EqualsSign = &clause[i] else { - r_panic!("Expected equals sign in add-assign operator: {clause:#?}"); - }; - i += 1; - - let mut expr = Expression::parse(&clause[i..(clause.len() - 1)])?; - if !is_add { - expr = expr.flipped_sign()?; - } - - let self_referencing = expr.check_self_referencing(&var); - clauses.push(Clause::AddToVariable { - var, - value: expr, - self_referencing, - }); - } - _ => r_panic!("Expected assignment operator in set clause: {clause:#?}"), - } - - Ok(clauses) -} - -fn parse_drain_copy_clause( - clause: &[Token], - is_draining: bool, -) -> Result, String> { - // drain g {i += 1;}; - // drain g into j; - // copy foo into bar {g += 2; etc;}; - // TODO: make a tuple-parsing function and use it here instead of a space seperated list of targets - - let mut targets = Vec::new(); - let mut block = Vec::new(); - let mut i = 1usize; - - let condition_start_token = i; - - i += 1; - while let Some(token) = clause.get(i) { - if let Token::Into | Token::OpenBrace | Token::Semicolon = token { - break; - } - i += 1; - } - r_assert!( - i < clause.len(), - "Expected source expression in draining/copying loop: {clause:#?}" - ); - - let source = Expression::parse(&clause[condition_start_token..i])?; - - if let Token::Into = &clause[i] { - // simple drain/copy move operations - i += 1; - - loop { - match &clause[i] { - Token::Name(_) | Token::Asterisk => { - let (var, len) = parse_var_target(&clause[i..])?; - i += len; - targets.push(var); - } - Token::OpenBrace | Token::Semicolon => { - break; - } - _ => { - r_panic!("Unexpected token in drain clause: {clause:#?}"); - } - } - } - } - - if let Token::OpenBrace = &clause[i] { - // code block to execute at each loop iteration - let braced_tokens = get_braced_tokens(&clause[i..], BRACES)?; - // recursion - block.extend(parse(braced_tokens)?); - // i += 2 + braced_tokens.len(); - } - - Ok(Clause::CopyLoop { - source, - targets, - block, - is_draining, - }) -} - -fn parse_while_clause( - clause: &[Token], -) -> Result, String> { - // TODO: make this able to accept expressions - let mut i = 1usize; - let (var, len) = parse_var_target(&clause[i..])?; - i += len; - - // let expr = parse_expression(&clause[1..i]); - let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; - // i += 2 + block_tokens.len(); - - Ok(Clause::WhileLoop { - var, - block: parse(block_tokens)?, - }) -} - -fn parse_if_else_clause( - clause: &[Token], -) -> Result, String> { - // skip first token, assumed to start with if - let mut i = 1usize; - let mut not = false; - if let Token::Not = &clause[i] { - not = true; - i += 1; - } - - let condition_start_token = i; - - i += 1; - while let Some(token) = clause.get(i) { - if let Token::OpenBrace = token { - break; - } - i += 1; - } - r_assert!( - i < clause.len(), - "Expected condition and block in if statement: {clause:#?}" - ); - - let condition = Expression::parse(&clause[condition_start_token..i])?; - - let block_one = { - let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; - i += 2 + block_tokens.len(); - parse(block_tokens)? - }; - - let block_two = if let Some(Token::Else) = &clause.get(i) { - i += 1; - let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; - // i += 2 + block_tokens.len(); - Some(parse(block_tokens)?) - } else { - None - }; - - Ok(match (not, block_one, block_two) { - (false, block_one, block_two) => Clause::IfElse { - condition, - if_block: Some(block_one), - else_block: block_two, - }, - (true, block_one, block_two) => Clause::IfElse { - condition, - if_block: block_two, - else_block: Some(block_one), - }, - }) -} - -fn parse_output_clause(clause: &[Token]) -> Result, String> { - let mut i = 1usize; - - let expr_tokens = &clause[i..(clause.len() - 1)]; - let expr = Expression::parse(expr_tokens)?; - i += expr_tokens.len(); - - let Token::Semicolon = &clause[i] else { - r_panic!("Invalid token at end of output clause: {clause:#?}"); - }; - - Ok(Clause::OutputValue { value: expr }) -} - -fn parse_input_clause(clause: &[Token]) -> Result, String> { - let mut i = 1usize; - - let (var, len) = parse_var_target(&clause[i..])?; - i += len; - - let Token::Semicolon = &clause[i] else { - r_panic!("Invalid token at end of input clause: {clause:#?}"); - }; - - Ok(Clause::InputVariable { var }) -} - -fn parse_assert_clause(clause: &[Token]) -> Result, String> { - let mut i = 1usize; - - let (var, len) = parse_var_target(&clause[i..])?; - i += len; - - if let Token::Unknown = &clause[i] { - Ok(Clause::AssertVariableValue { var, value: None }) - } else { - let Token::Equals = &clause[i] else { - r_panic!("Expected assertion value in assert clause: {clause:#?}"); - }; - i += 1; - - let Token::Semicolon = &clause[clause.len() - 1] else { - r_panic!("Invalid token at end of assert clause: {clause:#?}"); - }; - - let remaining = &clause[i..(clause.len() - 1)]; - let expr = Expression::parse(remaining)?; - - Ok(Clause::AssertVariableValue { - var, - value: Some(expr), - }) - } -} - -fn parse_integer(tokens: &[Token]) -> Result<(i32, usize), String> { - let mut i = 0; - let mut positive = true; - if let Token::Minus = &tokens[i] { - i += 1; - positive = false; - } - - let Token::Digits(digits) = &tokens[i] else { - r_panic!("Expected number after \"-\" in integer"); - }; - i += 1; - - // TODO: error handling - let offset = digits.parse::().unwrap(); - Ok((if positive { offset } else { -offset }, i)) -} - -fn parse_integer_tuple( - tokens: &[Token], -) -> Result<([i32; LENGTH], usize), String> { - let mut i = 0; - r_assert!( - matches!(&tokens[i], Token::OpenParenthesis), - "Expected opening parenthesis in tuple: {:?}", - &tokens[0..(tokens.len().min(5))] - ); - i += 1; - - let mut tuple = [0; LENGTH]; - for (j, element) in tuple.iter_mut().enumerate() { - let (offset, len) = parse_integer(&tokens[i..])?; - i += len; - *element = offset; - - if j < LENGTH - 1 { - r_assert!( - matches!(&tokens[i], Token::Comma), - "Expected comma in tuple: {:?}", - &tokens[0..(tokens.len().min(5))] - ); - i += 1; - } - } - r_assert!( - matches!(&tokens[i], Token::ClosingParenthesis), - "Expected closing parenthesis in tuple: {:?}", - &tokens[0..(tokens.len().min(5))] - ); - i += 1; - - Ok((tuple, i)) -} - -pub trait TapeCellLocation -where - Self: Sized + Display, -{ - /// parse any memory location specifiers - /// let g @(4,2) = 68; - /// or - /// let p @3 = 68; - fn parse_location_specifier( - tokens: &[Token], - ) -> Result<(LocationSpecifier, usize), String>; - - /// safely cast a 2D or 1D location specifier into a 1D non-negative cell offset, - /// for use with struct fields - fn to_positive_cell_offset(&self) -> Result; -} - -impl TapeCellLocation for TapeCell { - fn parse_location_specifier( - tokens: &[Token], - ) -> Result<(LocationSpecifier, usize), String> { - if tokens.len() == 0 { - return Ok((LocationSpecifier::None, 0)); - } - let Token::At = &tokens[0] else { - return Ok((LocationSpecifier::None, 0)); - }; - - let mut i = 1; - let location_specifier = match &tokens[i] { - Token::Minus | Token::Digits(_) => { - let (offset, len) = parse_integer(&tokens[i..])?; - i += len; - LocationSpecifier::Cell(offset) - } - Token::Name(_) => { - // variable location specifier - let (var, len) = parse_var_target(&tokens[i..])?; - i += len; - - LocationSpecifier::Variable(var) - } - _ => r_panic!( - "Invalid location specifier: {:?}", - &tokens[0..(tokens.len().min(5))] - ), - }; - - Ok((location_specifier, i)) - } - - fn to_positive_cell_offset(&self) -> Result { - r_assert!(*self >= 0, "Expected non-negative cell offset."); - Ok(*self as usize) - } -} - -impl TapeCellLocation for TapeCell2D { - fn parse_location_specifier( - tokens: &[Token], - ) -> Result<(LocationSpecifier, usize), String> { - if tokens.len() == 0 { - return Ok((LocationSpecifier::None, 0)); - } - let Token::At = &tokens[0] else { - return Ok((LocationSpecifier::None, 0)); - }; - - let mut i = 1; - let location_specifier = match &tokens[i] { - Token::OpenParenthesis => { - // parse a 2-tuple - let (tuple, len) = parse_integer_tuple::<2>(&tokens[i..])?; - i += len; - - LocationSpecifier::Cell(TapeCell2D(tuple[0], tuple[1])) - } - Token::Digits(_) | Token::Minus => { - let (x_offset, len) = parse_integer(&tokens[i..])?; - i += len; - - LocationSpecifier::Cell(TapeCell2D(x_offset, 0)) - } - Token::Name(_) => { - // variable location specifier - let (var, len) = parse_var_target(&tokens[i..])?; - i += len; - - LocationSpecifier::Variable(var) - } - _ => r_panic!("Expected constant or variable in location specifier: {tokens:#?}"), - }; - - Ok((location_specifier, i)) - } - - fn to_positive_cell_offset(&self) -> Result { - r_assert!( - self.1 == 0 && self.0 >= 0, - "Expected non-negative 1st dimensional cell offset (i.e. (x,y) where y=0)." - ); - Ok(self.0 as usize) - } -} - -fn parse_brainfuck_clause( - clause: &[Token], -) -> Result, String> { - // bf {++--<><} - // bf @3 {++--<><} - // bf clobbers var1 var2 {++--<><} - // bf @2 clobbers *arr {++--<><} - - let mut clobbers = Vec::new(); - let mut i = 1usize; - - // check for location specifier - let (mem_offset, len) = TC::parse_location_specifier(&clause[i..])?; - i += len; - - if let Token::Clobbers = &clause[i] { - i += 1; - - loop { - match &clause[i] { - Token::Name(_) | Token::Asterisk => { - let (var, len) = parse_var_target(&clause[i..])?; - i += len; - clobbers.push(var); - } - Token::OpenBrace => { - break; - } - _ => { - r_panic!("Unexpected token in drain clause: {clause:#?}"); - } - } - } - } - - let bf_tokens = get_braced_tokens(&clause[i..], BRACES)?; - let mut ops = Vec::new(); - let mut j = 0; - while j < bf_tokens.len() { - match &bf_tokens[j] { - Token::OpenBrace => { - // embedded mastermind - let block_tokens = get_braced_tokens(&bf_tokens[j..], BRACES)?; - let clauses = parse(block_tokens)?; - ops.push(ExtendedOpcode::Block(clauses)); - j += block_tokens.len() + 2; - } - token @ _ => { - ops.push(ExtendedOpcode::Opcode(OC::from_token(token)?)); - j += 1; - } - } - } - - Ok(Clause::InlineBrainfuck { - location_specifier: mem_offset, - clobbered_variables: clobbers, - operations: ops, - }) -} - -fn parse_function_definition_clause( - clause: &[Token], -) -> Result, String> { - let mut i = 1usize; - // function name - let Token::Name(name) = &clause[i] else { - r_panic!("Expected function name after in function definition clause: {clause:#?}"); - }; - let mut args = Vec::new(); - i += 1; - let Token::OpenParenthesis = &clause[i] else { - r_panic!("Expected argument list in function definition clause: {clause:#?}"); - }; - let arg_tokens = get_braced_tokens(&clause[i..], PARENTHESES)?; - let mut j = 0usize; - // parse function argument names - while j < arg_tokens.len() { - // break if no more arguments - let (Token::Cell | Token::Struct) = &arg_tokens[j] else { - break; - }; - let (var, len) = parse_var_definition(&arg_tokens[j..], false)?; - j += len; - - args.push(var); - - if j >= arg_tokens.len() { - break; - } else if let Token::Comma = &arg_tokens[j] { - j += 1; - } else { - r_panic!("Unexpected token in function definition arguments: {arg_tokens:#?}"); - } - } - - i += 2 + arg_tokens.len(); - - // recursively parse the inner block - let Token::OpenBrace = &clause[i] else { - r_panic!("Expected execution block in function definition: {clause:#?}"); - }; - - let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; - let parsed_block = parse(block_tokens)?; - - Ok(Clause::DefineFunction { - name: name.clone(), - arguments: args, - block: parsed_block, - }) -} - -fn parse_function_call_clause(clause: &[Token]) -> Result, String> { - let mut i = 0usize; - // Okay I didn't know this rust syntax, could have used it all over the place - let Token::Name(name) = &clause[i] else { - r_panic!("Expected function identifier at start of function call clause: {clause:#?}"); - }; - let mut args = Vec::new(); - i += 1; - - let Token::OpenParenthesis = &clause[i] else { - r_panic!("Expected argument list in function call clause: {clause:#?}"); - }; - let arg_tokens = get_braced_tokens(&clause[i..], PARENTHESES)?; - - let mut j = 0usize; - while j < arg_tokens.len() { - // this used to be in the while condition but moved it here to check for the case of no arguments - let Token::Name(_) = &arg_tokens[j] else { - break; - }; - let (var, len) = parse_var_target(&arg_tokens[j..])?; - j += len; - - args.push(var); - - if j >= arg_tokens.len() { - break; - } else if let Token::Comma = &arg_tokens[j] { - j += 1; - } else { - r_panic!("Unexpected token in function call arguments: {arg_tokens:#?}"); - } - } - - i += 2 + arg_tokens.len(); - - let Token::Semicolon = &clause[i] else { - r_panic!("Expected clause delimiter at end of function call clause: {clause:#?}"); - }; - - Ok(Clause::CallFunction { - function_name: name.clone(), - arguments: args, - }) -} - -fn parse_var_target(tokens: &[Token]) -> Result<(VariableTarget, usize), String> { - let mut i = 0usize; - let is_spread = if let Token::Asterisk = &tokens[i] { - i += 1; - true - } else { - false - }; - - let Token::Name(var_name) = &tokens[i] else { - r_panic!("Expected identifier in variable target identifier: {tokens:#?}"); - }; - i += 1; - - let mut ref_chain = vec![]; - while i < tokens.len() { - match &tokens[i] { - Token::OpenSquareBracket => { - let (index, tokens_used) = parse_subscript(&tokens[i..])?; - i += tokens_used; - ref_chain.push(Reference::Index(index)); - } - Token::Dot => { - i += 1; - let Token::Name(subfield_name) = &tokens[i] else { - r_panic!("Expected subfield name in variable target identifier: {tokens:#?}"); - }; - i += 1; - - ref_chain.push(Reference::NamedField(subfield_name.clone())); - } - _ => { - break; - } - } - } - - Ok(( - VariableTarget { - name: var_name.clone(), - subfields: if ref_chain.len() > 0 { - Some(VariableTargetReferenceChain(ref_chain)) - } else { - None - }, - is_spread, - }, - i, - )) -} - -/// convert tokens of a variable definition into data representation, e.g. `cell x`, `struct G g`, `cell[5] x_arr`, `struct H[100] hs` -fn parse_var_definition( - tokens: &[Token], - allow_location: bool, -) -> Result<(VariableDefinition, usize), String> { - let mut i = 0usize; - let mut var_type = match &tokens[i] { - Token::Cell => { - i += 1; - - VariableTypeReference::Cell - } - Token::Struct => { - i += 1; - - let Token::Name(struct_name) = &tokens[i] else { - r_panic!("Expected struct type name in variable definition: {tokens:#?}"); - }; - i += 1; - - VariableTypeReference::Struct(struct_name.clone()) - } - _ => { - r_panic!("Unexpected token in variable definition, this should not occur: {tokens:#?}") - } - }; - - // parse array specifiers - while let Token::OpenSquareBracket = &tokens[i] { - let (len, j) = parse_array_length(&tokens[i..])?; - i += j; - - var_type = VariableTypeReference::Array(Box::new(var_type), len); - } - - let Token::Name(var_name) = &tokens[i] else { - r_panic!("Expected identifier in variable definition: {tokens:#?}"); - }; - i += 1; - - let (location_specifier, len) = TC::parse_location_specifier(&tokens[i..])?; - - r_assert!( - location_specifier.is_none() || allow_location, - "Unexpected location specifier in variable definition: {tokens:#?}" - ); - i += len; - - Ok(( - VariableDefinition { - var_type, - name: var_name.clone(), - location_specifier, - }, - i, - )) -} - -/// parse the subscript of an array variable, e.g. [4] [6] -/// must be compile-time constant -/// returns (array length, tokens used) -/// assumes the first token is an open square bracket -fn parse_subscript(tokens: &[Token]) -> Result<(usize, usize), String> { - let mut i = 0usize; - let subscript = get_braced_tokens(&tokens[i..], SQUARE_BRACKETS)?; - let Expression::NaturalNumber(len) = Expression::parse(subscript)? else { - r_panic!("Expected a compile-time constant in subscript: {tokens:#?}"); - }; - - i += 2 + subscript.len(); - - Ok((len, i)) -} - -/// parse_array_subscript but with a length check -fn parse_array_length(tokens: &[Token]) -> Result<(usize, usize), String> { - let (len, i) = parse_subscript(tokens)?; - r_assert!(len > 0, "Array variable cannot be zero-length: {tokens:#?}"); - Ok((len, i)) -} - -/// get a clause's tokens, typically a line, bounded by ; -fn get_clause_tokens(tokens: &[Token]) -> Result, String> { - if tokens.len() == 0 { - Ok(None) - } else { - let mut i = 0usize; - while i < tokens.len() { - match tokens[i] { - Token::OpenBrace => { - let braced_block = get_braced_tokens(&tokens[i..], BRACES)?; - i += 2 + braced_block.len(); - // handle blocks marking the end of clauses, if/else being the exception - if i < tokens.len() { - if let Token::Else = tokens[i] { - i += 1; - let else_block = get_braced_tokens(&tokens[i..], BRACES)?; - i += 2 + else_block.len(); - } - } - return Ok(Some(&tokens[..i])); - } - Token::Semicolon => { - i += 1; - return Ok(Some(&tokens[..i])); - } - _ => { - i += 1; - } - } - } - - r_panic!("No clause could be found in: {tokens:#?}"); - } -} - -const SQUARE_BRACKETS: (Token, Token) = (Token::OpenSquareBracket, Token::ClosingSquareBracket); -const BRACES: (Token, Token) = (Token::OpenBrace, Token::ClosingBrace); -const PARENTHESES: (Token, Token) = (Token::OpenParenthesis, Token::ClosingParenthesis); -const ANGLED_BRACKETS: (Token, Token) = (Token::LessThan, Token::MoreThan); -// this should be a generic function but rust doesn't support enum variants as type arguments yet -// find tokens bounded by matching brackets -// TODO: make an impl for &[Token] and put all these functions in it -fn get_braced_tokens(tokens: &[Token], braces: (Token, Token)) -> Result<&[Token], String> { - let (open_brace, closing_brace) = (discriminant(&braces.0), discriminant(&braces.1)); - // find corresponding bracket, the depth check is unnecessary but whatever - let len = { - let mut i = 1usize; - let mut depth = 1; - while i < tokens.len() && depth > 0 { - let g = discriminant(&tokens[i]); - if g == open_brace { - depth += 1; - } else if g == closing_brace { - depth -= 1; - } - i += 1; - } - i - }; - - if len >= 2 { - if open_brace == discriminant(&tokens[0]) && closing_brace == discriminant(&tokens[len - 1]) - { - return Ok(&tokens[1..(len - 1)]); - } - } - r_panic!("Invalid braced tokens: {tokens:#?}"); -} - -impl Expression { - // Iterators? - // TODO: support post/pre increment in expressions - fn parse(tokens: &[Token]) -> Result { - let mut i = 0usize; - - if let Token::String(s) = &tokens[i] { - i += 1; - r_assert!( - i == tokens.len(), - "Expected semicolon after string literal {tokens:#?}" - ); - return Ok(Expression::StringLiteral(s.clone())); - } - - if let Token::OpenSquareBracket = &tokens[i] { - let braced_tokens = get_braced_tokens(&tokens[i..], SQUARE_BRACKETS)?; - i += 2 + braced_tokens.len(); - r_assert!( - i == tokens.len(), - "Expected semicolon after array literal {tokens:#?}" - ); - // parse the array - let results: Result, String> = braced_tokens - .split(|t| if let Token::Comma = t { true } else { false }) - .map(Self::parse) - .collect(); - // TODO: why do I need to split collect result into a seperate variable like here? - return Ok(Expression::ArrayLiteral(results?)); - } - - let mut current_sign = Some(Sign::Positive); // by default the first summand is positive - let mut summands = Vec::new(); - while i < tokens.len() { - match (¤t_sign, &tokens[i]) { - (None, Token::Plus) => { - current_sign = Some(Sign::Positive); - i += 1; - } - (None, Token::Minus) => { - current_sign = Some(Sign::Negative); - i += 1; - } - (Some(Sign::Positive), Token::Minus) => { - current_sign = Some(Sign::Negative); - i += 1; - } - (Some(Sign::Negative), Token::Minus) => { - current_sign = Some(Sign::Positive); - i += 1; - } - (Some(sign), Token::Digits(literal)) => { - let parsed_int: usize = literal.parse().unwrap(); - i += 1; - match sign { - Sign::Positive => summands.push(Expression::NaturalNumber(parsed_int)), - Sign::Negative => summands.push(Expression::SumExpression { - sign: Sign::Negative, - summands: vec![Expression::NaturalNumber(parsed_int)], - }), - } - current_sign = None; - } - (Some(sign), token @ (Token::True | Token::False)) => { - let parsed_int = match token { - Token::True => 1, - Token::False | _ => 0, - }; - i += 1; - summands.push(match sign { - Sign::Positive => Expression::NaturalNumber(parsed_int), - Sign::Negative => Expression::SumExpression { - sign: Sign::Negative, - summands: vec![Expression::NaturalNumber(parsed_int)], - }, - }); - current_sign = None; - } - (Some(sign), Token::Character(chr)) => { - let chr_int: usize = *chr as usize; - - r_assert!( - chr_int < 0xff, - "Character tokens must be single-byte: {chr}" - ); - - i += 1; - summands.push(match sign { - Sign::Positive => Expression::NaturalNumber(chr_int), - Sign::Negative => Expression::SumExpression { - sign: Sign::Negative, - summands: vec![Expression::NaturalNumber(chr_int)], - }, - }); - current_sign = None; - } - (Some(sign), Token::Name(_) | Token::Asterisk) => { - let (var, len) = parse_var_target(&tokens[i..])?; - i += len; - summands.push(match sign { - Sign::Positive => Expression::VariableReference(var), - Sign::Negative => Expression::SumExpression { - sign: Sign::Negative, - summands: vec![Expression::VariableReference(var)], - }, - }); - current_sign = None; - } - (Some(sign), Token::OpenParenthesis) => { - let braced_tokens = get_braced_tokens(&tokens[i..], PARENTHESES)?; - i += 2 + braced_tokens.len(); - let braced_expr = Self::parse(braced_tokens)?; - // probably inefficent but everything needs to be flattened at some point anyway so won't matter - // TODO: make expression structure more efficient (don't use vectors every time there is a negative) - summands.push(match (sign, braced_expr.clone()) { - ( - Sign::Negative, - Expression::NaturalNumber(_) | Expression::VariableReference(_), - ) => Expression::SumExpression { - sign: Sign::Negative, - summands: vec![braced_expr], - }, - ( - Sign::Negative, - Expression::SumExpression { - sign: Sign::Negative, - summands, - }, - ) => Expression::SumExpression { - sign: Sign::Positive, - summands, - }, - ( - Sign::Negative, - Expression::SumExpression { - sign: Sign::Positive, - summands, - }, - ) => Expression::SumExpression { - sign: Sign::Negative, - summands, - }, - _ => braced_expr, - }); - current_sign = None; - } - token => { - r_panic!("Unexpected token {token:#?} found in expression: {tokens:#?}"); - } - } - } - - Ok(match summands.len() { - 1 => summands.into_iter().next().unwrap(), - 1.. => Expression::SumExpression { - sign: Sign::Positive, - summands, - }, - _ => r_panic!("Expected value in expression: {tokens:#?}"), - }) - } - - /// flip the sign of an expression, equivalent to `x => -(x)` - pub fn flipped_sign(self) -> Result { - Ok(match self { - Expression::SumExpression { sign, summands } => Expression::SumExpression { - sign: sign.flipped(), - summands, - }, - Expression::NaturalNumber(_) | Expression::VariableReference(_) => { - Expression::SumExpression { - sign: Sign::Negative, - summands: vec![self], - } - } - Expression::ArrayLiteral(_) | Expression::StringLiteral(_) => { - r_panic!( - "Attempted to invert sign of array or string literal, \ - do not use += or -= on arrays or strings." - ); - } - }) - } - - // not sure if this is the compiler's concern or if it should be the parser - // (constant to add, variables to add, variables to subtract) - // currently multiplication is not supported so order of operations and flattening is very trivial - // If we add multiplication in future it will likely be constant multiplication only, so no variable on variable multiplication - pub fn flatten(&self) -> Result<(u8, Vec, Vec), String> { - let expr = self; - let mut imm_sum = Wrapping(0u8); - let mut additions = Vec::new(); - let mut subtractions = Vec::new(); - - match expr { - Expression::SumExpression { sign, summands } => { - let results: Result, Vec)>, String> = - summands.into_iter().map(|expr| expr.flatten()).collect(); - let flattened = results? - .into_iter() - .reduce(|acc, (imm, adds, subs)| { - ( - (Wrapping(acc.0) + Wrapping(imm)).0, - [acc.1, adds].concat(), - [acc.2, subs].concat(), - ) - }) - .unwrap_or((0, vec![], vec![])); - - match sign { - Sign::Positive => { - imm_sum += flattened.0; - additions.extend(flattened.1); - subtractions.extend(flattened.2); - } - Sign::Negative => { - imm_sum -= flattened.0; - subtractions.extend(flattened.1); - additions.extend(flattened.2); - } - }; - } - Expression::NaturalNumber(number) => { - imm_sum += Wrapping(*number as u8); - } - Expression::VariableReference(var) => { - additions.push(var.clone()); - } - Expression::ArrayLiteral(_) | Expression::StringLiteral(_) => { - r_panic!("Attempt to flatten an array-like expression: {expr:#?}"); - } - } - - Ok((imm_sum.0, additions, subtractions)) - } - - //Recursively Check If This Is Self Referencing - pub fn check_self_referencing(&self, parent: &VariableTarget) -> bool { - // TODO: make sure nested values work correctly - match self { - Expression::SumExpression { - sign: _sign, - summands, - } => summands - .iter() - .any(|summand| summand.check_self_referencing(parent)), - Expression::VariableReference(var) => *var == *parent, - Expression::ArrayLiteral(_) - | Expression::StringLiteral(_) - | Expression::NaturalNumber(_) => false, - } - } -} - -// TODO: add multiplication -// yes, but no variable * variable multiplication or division -#[derive(Debug, Clone)] -#[cfg_attr(test, derive(PartialEq))] -pub enum Expression { - SumExpression { - sign: Sign, - summands: Vec, - }, - NaturalNumber(usize), - VariableReference(VariableTarget), - ArrayLiteral(Vec), - StringLiteral(String), -} - -#[derive(Debug, Clone)] -#[cfg_attr(test, derive(PartialEq))] -pub enum Sign { - Positive, - Negative, -} -impl Sign { - fn flipped(self) -> Sign { - match self { - Sign::Positive => Sign::Negative, - Sign::Negative => Sign::Positive, - } - } -} - -/// Clause type type variables: -/// - TC: TapeCell can be changed to implement 2D brainfuck, or other modifications -/// - OC: Opcode represents the valid Brainfuck Opcodes that we're generating (also used for 2D or other BF variants) -#[derive(Debug, Clone)] -#[cfg_attr(test, derive(PartialEq))] -pub enum Clause { - DeclareVariable { - var: VariableDefinition, - }, - DefineVariable { - var: VariableDefinition, - value: Expression, - }, - DefineStruct { - name: String, - fields: Vec, - }, - AddToVariable { - var: VariableTarget, - value: Expression, - self_referencing: bool, - }, - SetVariable { - var: VariableTarget, - value: Expression, - self_referencing: bool, - }, - AssertVariableValue { - var: VariableTarget, - // Some(constant) indicates we know the value, None indicates we don't know the value - // typically will either be used for assert unknown or assert 0 - value: Option, - }, - CopyLoop { - source: Expression, - targets: Vec, - block: Vec>, - is_draining: bool, - }, - WhileLoop { - var: VariableTarget, - block: Vec>, - }, - OutputValue { - value: Expression, - }, - InputVariable { - var: VariableTarget, - }, - DefineFunction { - name: String, - // TODO: fix the type here, as function definitions don't actually need location specifiers and therefore don't need a tape cell type - arguments: Vec>, - block: Vec>, - }, - CallFunction { - function_name: String, - arguments: Vec, - }, - IfElse { - condition: Expression, - if_block: Option>>, - else_block: Option>>, - }, - Block(Vec>), - InlineBrainfuck { - location_specifier: LocationSpecifier, - clobbered_variables: Vec, - operations: Vec>, - }, -} - -// extended brainfuck opcodes to include mastermind code blocks -#[derive(Debug, Clone)] -#[cfg_attr(test, derive(PartialEq))] -pub enum ExtendedOpcode { - Opcode(OC), - Block(Vec>), -} - -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -/// the type of a variable according to the user, not validated yet as the parser does not keep track of types -pub enum VariableTypeReference { - Cell, - Struct(String), - Array(Box, usize), -} - -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub enum LocationSpecifier { - None, - Cell(TC), - Variable(VariableTarget), -} -impl LocationSpecifier { - fn is_none(&self) -> bool { - matches!(self, LocationSpecifier::None) - } -} - -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub struct VariableDefinition { - pub name: String, - pub var_type: VariableTypeReference, - pub location_specifier: LocationSpecifier, - // Infinite {name: String, pattern: ???}, -} - -#[derive(Debug, Clone, PartialEq)] -pub struct StructFieldDefinition { - pub name: String, - pub field_type: VariableTypeReference, - pub location_offset_specifier: Option, -} -// let non_neg_location_specifier = match &var_def.location_specifier { -// LocationSpecifier::None => None, -// LocationSpecifier::Cell(l) => { -// // assert the y coordinate is 0 -// // r_assert!( -// // l.1 == 0, -// // "Struct field location specifiers do not support 2D grid cells: {var_def}" -// // ); -// r_assert!( -// l.0 >= 0, -// "Struct field location specifiers must be non-negative: {var_def}" -// ); -// Some(l.0 as usize) -// } -// LocationSpecifier::Variable(_) => { -// r_panic!( "Location specifiers in struct definitions must be relative, not variables: {var_def}") -// } -// }; -impl TryInto for VariableDefinition -where - TC: TapeCellLocation, -{ - type Error = String; - - fn try_into(self) -> Result { - let location_offset_specifier = match &self.location_specifier { - LocationSpecifier::None => None, - LocationSpecifier::Cell(cell) => Some(match cell.to_positive_cell_offset() { - Ok(offset) => offset, - Err(err) => r_panic!("Cannot create struct field \"{self}\". {err}"), - }), - LocationSpecifier::Variable(_) => r_panic!( - "Location specifiers in struct definitions \ -must be relative, not variable." - ), - }; - Ok(StructFieldDefinition { - name: self.name, - field_type: self.var_type, - location_offset_specifier, - }) - } -} - -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub enum Reference { - NamedField(String), - Index(usize), -} - -/// Represents a list of subfield references after the `.` or `[x]` operators, e.g. `obj.h[6]` would have `['h', '[6]']` -// a bit verbose, not quite sure about this -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub struct VariableTargetReferenceChain(pub Vec); -/// Represents a target variable in an expression, this has no type informatino -#[derive(Debug, Clone, Hash, PartialEq, Eq)] -pub struct VariableTarget { - pub name: String, - pub subfields: Option, - pub is_spread: bool, -} -impl VariableTarget { - /// convert a definition to a target for use with definition clauses (as opposed to declarations) - pub fn from_definition(var_def: &VariableDefinition) -> Self { - VariableTarget { - name: var_def.name.clone(), - subfields: None, - is_spread: false, - } - } -} - -impl Display for VariableTypeReference { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match &self { - VariableTypeReference::Cell => f.write_str("cell"), - VariableTypeReference::Struct(struct_name) => { - f.write_fmt(format_args!("struct {struct_name}")) - } - VariableTypeReference::Array(element_type, len) => { - f.write_fmt(format_args!("{element_type}[{len}]")) - } - } - } -} - -impl Display for VariableDefinition { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(&format!("{} {}", self.var_type, self.name))?; - match &self.location_specifier { - LocationSpecifier::Cell(_) | LocationSpecifier::Variable(_) => { - f.write_str(&format!(" {}", self.location_specifier))? - } - LocationSpecifier::None => (), - } - - Ok(()) - } -} - -impl Display for LocationSpecifier { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str("@")?; - match self { - LocationSpecifier::Cell(cell) => f.write_str(&format!("{cell}"))?, - LocationSpecifier::Variable(var) => f.write_str(&format!("{var}"))?, - LocationSpecifier::None => (), - } - - Ok(()) - } -} - -impl Display for Reference { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Reference::NamedField(subfield_name) => f.write_str(&format!(".{subfield_name}"))?, - Reference::Index(index) => f.write_str(&format!("[{index}]"))?, - } - - Ok(()) - } -} - -impl Display for VariableTarget { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if self.is_spread { - f.write_str("*")?; - } - f.write_str(&self.name)?; - if let Some(subfield_refs) = &self.subfields { - for ref_step in subfield_refs.0.iter() { - f.write_str(&format!("{ref_step}"))?; - } - } - - Ok(()) - } -} - -#[cfg(test)] -mod parser_tests { - use crate::backend::{bf::TapeCell, bf2d::Opcode2D}; - - use super::*; - - #[test] - fn parse_if_1() { - assert!(parse::(&[ - // if true {{}} - Token::If, - Token::True, - Token::OpenBrace, - Token::OpenBrace, - Token::ClosingBrace, - Token::ClosingBrace, - ]) - .unwrap() - .iter() - .eq(&[Clause::IfElse { - condition: Expression::NaturalNumber(1), - if_block: Some(vec![Clause::::Block(vec![])]), - else_block: None, - }])); - } - - #[test] - fn end_tokens_1() { - let _ = parse::(&[Token::Clobbers]).expect_err(""); - } - - #[test] - fn end_tokens_2() { - let _ = parse::(&[Token::Semicolon]).unwrap(); - let _ = parse::(&[Token::Semicolon, Token::Semicolon]).unwrap(); - let _ = - parse::(&[Token::Semicolon, Token::Semicolon, Token::Semicolon]) - .unwrap(); - } - - #[test] - fn end_tokens_3() { - let _ = parse::(&[Token::Cell, Token::Semicolon]).expect_err(""); - } - - #[test] - fn while_condition_1() { - assert!(parse::(&[ - Token::While, - Token::Name(String::from("x")), - Token::OpenBrace, - Token::OpenBrace, - Token::ClosingBrace, - Token::ClosingBrace, - ]) - .unwrap() - .iter() - .eq(&[Clause::WhileLoop { - var: VariableTarget { - name: String::from("x"), - subfields: None, - is_spread: false - }, - block: vec![Clause::Block(vec![])] - }])) - } - - #[test] - fn two_dimensional_1() { - assert!(parse::(&[ - Token::Cell, - Token::Name(String::from("x")), - Token::At, - Token::OpenParenthesis, - Token::Digits(String::from("0")), - Token::Comma, - Token::Digits(String::from("1")), - Token::ClosingParenthesis, - Token::Semicolon, - ]) - .unwrap_err() - .contains("Invalid location specifier")); - } - - #[test] - fn two_dimensional_2() { - assert!(parse::(&[ - Token::Cell, - Token::Name(String::from("x")), - Token::At, - Token::OpenParenthesis, - Token::Digits(String::from("0")), - Token::Comma, - Token::Digits(String::from("1")), - Token::ClosingParenthesis, - Token::Semicolon, - ]) - .unwrap() - .iter() - .eq(&[Clause::DeclareVariable { - var: VariableDefinition { - name: String::from("x"), - var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::Cell(TapeCell2D(0, 1)) - } - }])); - } - - #[test] - fn two_dimensional_3() { - assert!(parse::(&[ - Token::Cell, - Token::Name(String::from("xyz")), - Token::At, - Token::OpenParenthesis, - Token::Minus, - Token::Digits(String::from("10")), - Token::Comma, - Token::Minus, - Token::Digits(String::from("101")), - Token::ClosingParenthesis, - Token::Semicolon, - ]) - .unwrap() - .iter() - .eq(&[Clause::DeclareVariable { - var: VariableDefinition { - name: String::from("xyz"), - var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::Cell(TapeCell2D(-10, -101)) - } - }])); - } - - #[test] - fn var_v() { - assert!(parse::(&[ - Token::Cell, - Token::Name(String::from("v")), - Token::Semicolon - ]) - .unwrap() - .iter() - .eq(&[Clause::DeclareVariable { - var: VariableDefinition { - name: String::from("v"), - var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::None - } - }])) - } - - #[test] - fn inline_bf_1() { - assert!(parse::(&[ - Token::Cell, - Token::Name(String::from("v")), - Token::Semicolon, - Token::Bf, - Token::OpenBrace, - Token::Plus, - Token::OpenBrace, - Token::Cell, - Token::Name(String::from("v")), - Token::Semicolon, - Token::ClosingBrace, - Token::Minus, - Token::ClosingBrace - ]) - .unwrap() - .iter() - .eq(&[ - Clause::DeclareVariable { - var: VariableDefinition { - name: String::from("v"), - var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::None - } - }, - Clause::InlineBrainfuck { - location_specifier: LocationSpecifier::None, - clobbered_variables: vec![], - operations: vec![ - ExtendedOpcode::Opcode(Opcode2D::Add), - ExtendedOpcode::Block(vec![Clause::DeclareVariable { - var: VariableDefinition { - name: String::from("v"), - var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::None - } - }]), - ExtendedOpcode::Opcode(Opcode2D::Subtract), - ] - } - ])) - } - - // TODO: make context-based parser for brainfuck and refactor these tests - #[test] - fn inline_bf_2() { - assert!(parse::(&[ - Token::Cell, - Token::Name(String::from("v")), - Token::Semicolon, - Token::Bf, - Token::OpenBrace, - Token::Name(String::from("v")), - Token::OpenBrace, - Token::Cell, - Token::Name(String::from("v")), - Token::Semicolon, - Token::ClosingBrace, - Token::Caret, - Token::ClosingBrace - ]) - .unwrap() - .iter() - .eq(&[ - Clause::DeclareVariable { - var: VariableDefinition { - name: String::from("v"), - var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::None - } - }, - Clause::InlineBrainfuck { - location_specifier: LocationSpecifier::None, - clobbered_variables: vec![], - operations: vec![ - ExtendedOpcode::Opcode(Opcode2D::Down), - ExtendedOpcode::Block(vec![Clause::DeclareVariable { - var: VariableDefinition { - name: String::from("v"), - var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::None - } - }]), - ExtendedOpcode::Opcode(Opcode2D::Up), - ] - } - ])) - } - - #[test] - fn inline_bf_3() { - assert!(parse::(&[ - Token::Bf, - Token::OpenBrace, - Token::Name(String::from("vvvv")), - Token::MoreThan, - Token::ClosingBrace - ]) - .unwrap() - .iter() - .eq(&[Clause::InlineBrainfuck { - location_specifier: LocationSpecifier::None, - clobbered_variables: vec![], - operations: vec![ - ExtendedOpcode::Opcode(Opcode2D::Down), - ExtendedOpcode::Opcode(Opcode2D::Down), - ExtendedOpcode::Opcode(Opcode2D::Down), - ExtendedOpcode::Opcode(Opcode2D::Down), - ExtendedOpcode::Opcode(Opcode2D::Right), - ] - }])) - } -} diff --git a/compiler/src/parser/expressions.rs b/compiler/src/parser/expressions.rs new file mode 100644 index 0000000..39984f3 --- /dev/null +++ b/compiler/src/parser/expressions.rs @@ -0,0 +1,285 @@ +use super::types::VariableTarget; + +// TODO: add multiplication +// yes, but no variable * variable multiplication or division +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +pub enum Expression { + SumExpression { + sign: Sign, + summands: Vec, + }, + NaturalNumber(usize), + VariableReference(VariableTarget), + ArrayLiteral(Vec), + StringLiteral(String), +} + +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +pub enum Sign { + Positive, + Negative, +} +impl Sign { + fn flipped(self) -> Sign { + match self { + Sign::Positive => Sign::Negative, + Sign::Negative => Sign::Positive, + } + } +} + +impl Expression { + // Iterators? + // TODO: support post/pre increment in expressions + pub fn parse(tokens: &[Token]) -> Result { + let mut i = 0usize; + + if let Token::String(s) = &tokens[i] { + i += 1; + r_assert!( + i == tokens.len(), + "Expected semicolon after string literal {tokens:#?}" + ); + return Ok(Expression::StringLiteral(s.clone())); + } + + if let Token::OpenSquareBracket = &tokens[i] { + let braced_tokens = get_braced_tokens(&tokens[i..], SQUARE_BRACKETS)?; + i += 2 + braced_tokens.len(); + r_assert!( + i == tokens.len(), + "Expected semicolon after array literal {tokens:#?}" + ); + // parse the array + let results: Result, String> = braced_tokens + .split(|t| if let Token::Comma = t { true } else { false }) + .map(Self::parse) + .collect(); + // TODO: why do I need to split collect result into a seperate variable like here? + return Ok(Expression::ArrayLiteral(results?)); + } + + let mut current_sign = Some(Sign::Positive); // by default the first summand is positive + let mut summands = Vec::new(); + while i < tokens.len() { + match (¤t_sign, &tokens[i]) { + (None, Token::Plus) => { + current_sign = Some(Sign::Positive); + i += 1; + } + (None, Token::Minus) => { + current_sign = Some(Sign::Negative); + i += 1; + } + (Some(Sign::Positive), Token::Minus) => { + current_sign = Some(Sign::Negative); + i += 1; + } + (Some(Sign::Negative), Token::Minus) => { + current_sign = Some(Sign::Positive); + i += 1; + } + (Some(sign), Token::Digits(literal)) => { + let parsed_int: usize = literal.parse().unwrap(); + i += 1; + match sign { + Sign::Positive => summands.push(Expression::NaturalNumber(parsed_int)), + Sign::Negative => summands.push(Expression::SumExpression { + sign: Sign::Negative, + summands: vec![Expression::NaturalNumber(parsed_int)], + }), + } + current_sign = None; + } + (Some(sign), token @ (Token::True | Token::False)) => { + let parsed_int = match token { + Token::True => 1, + Token::False | _ => 0, + }; + i += 1; + summands.push(match sign { + Sign::Positive => Expression::NaturalNumber(parsed_int), + Sign::Negative => Expression::SumExpression { + sign: Sign::Negative, + summands: vec![Expression::NaturalNumber(parsed_int)], + }, + }); + current_sign = None; + } + (Some(sign), Token::Character(chr)) => { + let chr_int: usize = *chr as usize; + + r_assert!( + chr_int < 0xff, + "Character tokens must be single-byte: {chr}" + ); + + i += 1; + summands.push(match sign { + Sign::Positive => Expression::NaturalNumber(chr_int), + Sign::Negative => Expression::SumExpression { + sign: Sign::Negative, + summands: vec![Expression::NaturalNumber(chr_int)], + }, + }); + current_sign = None; + } + (Some(sign), Token::Name(_) | Token::Asterisk) => { + let (var, len) = parse_var_target(&tokens[i..])?; + i += len; + summands.push(match sign { + Sign::Positive => Expression::VariableReference(var), + Sign::Negative => Expression::SumExpression { + sign: Sign::Negative, + summands: vec![Expression::VariableReference(var)], + }, + }); + current_sign = None; + } + (Some(sign), Token::OpenParenthesis) => { + let braced_tokens = get_braced_tokens(&tokens[i..], PARENTHESES)?; + i += 2 + braced_tokens.len(); + let braced_expr = Self::parse(braced_tokens)?; + // probably inefficent but everything needs to be flattened at some point anyway so won't matter + // TODO: make expression structure more efficient (don't use vectors every time there is a negative) + summands.push(match (sign, braced_expr.clone()) { + ( + Sign::Negative, + Expression::NaturalNumber(_) | Expression::VariableReference(_), + ) => Expression::SumExpression { + sign: Sign::Negative, + summands: vec![braced_expr], + }, + ( + Sign::Negative, + Expression::SumExpression { + sign: Sign::Negative, + summands, + }, + ) => Expression::SumExpression { + sign: Sign::Positive, + summands, + }, + ( + Sign::Negative, + Expression::SumExpression { + sign: Sign::Positive, + summands, + }, + ) => Expression::SumExpression { + sign: Sign::Negative, + summands, + }, + _ => braced_expr, + }); + current_sign = None; + } + token => { + r_panic!("Unexpected token {token:#?} found in expression: {tokens:#?}"); + } + } + } + + Ok(match summands.len() { + 1 => summands.into_iter().next().unwrap(), + 1.. => Expression::SumExpression { + sign: Sign::Positive, + summands, + }, + _ => r_panic!("Expected value in expression: {tokens:#?}"), + }) + } + + /// flip the sign of an expression, equivalent to `x => -(x)` + pub fn flipped_sign(self) -> Result { + Ok(match self { + Expression::SumExpression { sign, summands } => Expression::SumExpression { + sign: sign.flipped(), + summands, + }, + Expression::NaturalNumber(_) | Expression::VariableReference(_) => { + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![self], + } + } + Expression::ArrayLiteral(_) | Expression::StringLiteral(_) => { + r_panic!( + "Attempted to invert sign of array or string literal, \ + do not use += or -= on arrays or strings." + ); + } + }) + } + + // not sure if this is the compiler's concern or if it should be the parser + // (constant to add, variables to add, variables to subtract) + // currently multiplication is not supported so order of operations and flattening is very trivial + // If we add multiplication in future it will likely be constant multiplication only, so no variable on variable multiplication + pub fn flatten(&self) -> Result<(u8, Vec, Vec), String> { + let expr = self; + let mut imm_sum = Wrapping(0u8); + let mut additions = Vec::new(); + let mut subtractions = Vec::new(); + + match expr { + Expression::SumExpression { sign, summands } => { + let results: Result, Vec)>, String> = + summands.into_iter().map(|expr| expr.flatten()).collect(); + let flattened = results? + .into_iter() + .reduce(|acc, (imm, adds, subs)| { + ( + (Wrapping(acc.0) + Wrapping(imm)).0, + [acc.1, adds].concat(), + [acc.2, subs].concat(), + ) + }) + .unwrap_or((0, vec![], vec![])); + + match sign { + Sign::Positive => { + imm_sum += flattened.0; + additions.extend(flattened.1); + subtractions.extend(flattened.2); + } + Sign::Negative => { + imm_sum -= flattened.0; + subtractions.extend(flattened.1); + additions.extend(flattened.2); + } + }; + } + Expression::NaturalNumber(number) => { + imm_sum += Wrapping(*number as u8); + } + Expression::VariableReference(var) => { + additions.push(var.clone()); + } + Expression::ArrayLiteral(_) | Expression::StringLiteral(_) => { + r_panic!("Attempt to flatten an array-like expression: {expr:#?}"); + } + } + + Ok((imm_sum.0, additions, subtractions)) + } + + //Recursively Check If This Is Self Referencing + pub fn check_self_referencing(&self, parent: &VariableTarget) -> bool { + // TODO: make sure nested values work correctly + match self { + Expression::SumExpression { + sign: _sign, + summands, + } => summands + .iter() + .any(|summand| summand.check_self_referencing(parent)), + Expression::VariableReference(var) => *var == *parent, + Expression::ArrayLiteral(_) + | Expression::StringLiteral(_) + | Expression::NaturalNumber(_) => false, + } + } +} diff --git a/compiler/src/parser/mod.rs b/compiler/src/parser/mod.rs new file mode 100644 index 0000000..71f2aaf --- /dev/null +++ b/compiler/src/parser/mod.rs @@ -0,0 +1,5 @@ +mod expressions; +mod parser; +mod tests; +mod tokeniser; +pub mod types; diff --git a/compiler/src/parser/old_parser.rs b/compiler/src/parser/old_parser.rs new file mode 100644 index 0000000..814fed5 --- /dev/null +++ b/compiler/src/parser/old_parser.rs @@ -0,0 +1,607 @@ +// project dependencies: +use crate::{ + backend::{bf::TapeCell, bf2d::TapeCell2D, common::OpcodeVariant}, + macros::macros::{r_assert, r_panic}, + tokeniser::Token, +}; + +// stdlib dependencies +use std::{fmt::Display, mem::discriminant, num::Wrapping}; + +/// recursive function to create a tree representation of the program +pub fn parse_clause_from_tokens( + tokens: &[Token], + blocks: Vec>>, +) -> Result>, String> { + Ok(match (&tokens[0], &tokens.get(1), &tokens.get(2)) { + (Token::Cell, _, _) + | (Token::Struct, Some(Token::Name(_)), Some(Token::Name(_) | Token::OpenSquareBracket)) => { + Some(parse_let_clause(tokens)?) + } + (Token::Struct, Some(Token::Name(_)), Some(Token::OpenBrace)) => { + Some(parse_struct_clause(tokens)?) + } + (Token::Plus, Some(Token::Plus), _) | (Token::Minus, Some(Token::Minus), _) => { + Some(parse_increment_clause(tokens)?) + } + (Token::Name(_), Some(Token::EqualsSign | Token::Dot | Token::OpenSquareBracket), _) => { + Some(parse_set_clause(clause_tokens)?) + } + (Token::Drain, _, _) => Some(parse_drain_copy_clause( + tokens, + true, + blocks + .get(0) + .ok_or(format!("Expected code block in drain clause: {tokens:#?}"))?, + )?), + (Token::Copy, _, _) => { + clauses.push(parse_drain_copy_clause(clause_tokens, false)?); + } + (Token::While, _, _) => { + clauses.push(parse_while_clause(clause_tokens)?); + } + (Token::Output, _, _) => { + clauses.push(parse_output_clause(clause_tokens)?); + } + (Token::Input, _, _) => { + clauses.push(parse_input_clause(clause_tokens)?); + } + (Token::Name(_), Some(Token::OpenParenthesis), _) => { + clauses.push(parse_function_call_clause(clause_tokens)?); + } + (Token::Fn, _, _) => { + clauses.push(parse_function_definition_clause(clause_tokens)?); + } + (Token::Name(_), Token::Plus | Token::Minus, Token::EqualsSign) => { + clauses.extend(parse_add_clause(clause_tokens)?); + } + (Token::If, _, _) => { + clauses.push(parse_if_else_clause(clause_tokens)?); + } + (Token::OpenBrace, _, _) => { + let braced_tokens = get_braced_tokens(clause_tokens, BRACES)?; + let inner_clauses = parse(braced_tokens)?; + clauses.push(Clause::Block(inner_clauses)); + } + (Token::Assert, _, _) => Some(parse_assert_clause(tokens)?), + // empty clause + (Token::Semicolon, _, _) => None, + // the None token usually represents whitespace, it should be filtered out before reaching this function + // Wrote out all of these possibilities so that the compiler will tell me when I haven't implemented a token + ( + Token::Else + | Token::Not + | Token::ClosingBrace + | Token::OpenSquareBracket + | Token::ClosingSquareBracket + | Token::OpenParenthesis + | Token::ClosingParenthesis + | Token::Comma + | Token::Plus + | Token::Minus + | Token::Into + | Token::Digits(_) + | Token::Name(_) + | Token::String(_) + | Token::Character(_) + | Token::True + | Token::False + | Token::EqualsSign + | Token::Asterisk + | Token::Clobbers + | Token::Equals + | Token::Unknown + | Token::Dot + | Token::At + | Token::Struct, + _, + _, + ) => r_panic!("Invalid clause: {tokens:#?}"), + }) +} + +fn parse_add_clause(clause: &[Token]) -> Result>, String> { + let mut clauses = Vec::new(); + let mut i = 0usize; + let (var, len) = parse_var_target(&clause[i..])?; + i += len; + + let positive = match &clause[i] { + Token::Plus => true, + Token::Minus => false, + _ => { + r_panic!("Unexpected second token in add clause: {clause:#?}"); + } + }; + i += 2; // assume the equals sign is there because it was checked by the main loop + let raw_expr = Expression::parse(&clause[i..(clause.len() - 1)])?; + let expr = match positive { + true => raw_expr, + false => Expression::SumExpression { + sign: Sign::Negative, + summands: vec![raw_expr], + }, + }; + //Check if this add clause self references + let self_referencing = expr.check_self_referencing(&var); + + clauses.push(Clause::AddToVariable { + var, + value: expr, + self_referencing: self_referencing, + }); + + Ok(clauses) +} + +// currently just syntax sugar, should make it actually do post/pre increments +fn parse_increment_clause(clause: &[Token]) -> Result, String> { + let (var, _) = parse_var_target(&clause[2..])?; + //An increment clause can never be self referencing since it just VAR++ + Ok(match (&clause[0], &clause[1]) { + (Token::Plus, Token::Plus) => Clause::AddToVariable { + var, + value: Expression::NaturalNumber(1), + self_referencing: false, + }, + (Token::Minus, Token::Minus) => Clause::AddToVariable { + var, + value: Expression::NaturalNumber((-1i8 as u8) as usize), + self_referencing: false, + }, + _ => { + r_panic!("Invalid pattern in increment clause: {clause:#?}"); + } + }) + // assumed that the final token is a semicolon +} + +fn parse_set_clause(clause: &[Token]) -> Result, String> { + let mut i = 0usize; + let (var, len) = parse_var_target(&clause[i..])?; + i += len; + + Ok(match &clause[i] { + Token::EqualsSign => { + i += 1; + let expr = Expression::parse(&clause[i..(clause.len() - 1)])?; + let self_referencing = expr.check_self_referencing(&var); + Clause::SetVariable { + var, + value: expr, + self_referencing, + } + } + Token::Plus | Token::Minus => { + let is_add = if let Token::Plus = &clause[i] { + true + } else { + false + }; + i += 1; + let Token::EqualsSign = &clause[i] else { + r_panic!("Expected equals sign in add-assign operator: {clause:#?}"); + }; + i += 1; + + let mut expr = Expression::parse(&clause[i..(clause.len() - 1)])?; + if !is_add { + expr = expr.flipped_sign()?; + } + + let self_referencing = expr.check_self_referencing(&var); + Clause::AddToVariable { + var, + value: expr, + self_referencing, + } + } + _ => r_panic!("Expected assignment operator in set clause: {clause:#?}"), + }) +} + +fn parse_drain_copy_clause( + clause: &[Token], + is_draining: bool, + block: Vec>, +) -> Result, String> { + // drain g {i += 1;}; + // drain g into j; + // copy foo into bar {g += 2; etc;}; + // TODO: make a tuple-parsing function and use it here instead of a space seperated list of targets + + let mut targets = Vec::new(); + let mut i = 1usize; + + let condition_start_token = i; + + i += 1; + while let Some(token) = clause.get(i) { + if let Token::Into | Token::OpenBrace | Token::Semicolon = token { + break; + } + i += 1; + } + r_assert!( + i < clause.len(), + "Expected source expression in draining/copying loop: {clause:#?}" + ); + + let source = Expression::parse(&clause[condition_start_token..i])?; + + if let Token::Into = &clause[i] { + // simple drain/copy move operations + i += 1; + + loop { + match &clause[i] { + Token::Name(_) | Token::Asterisk => { + let (var, len) = parse_var_target(&clause[i..])?; + i += len; + targets.push(var); + } + Token::OpenBrace | Token::Semicolon => { + break; + } + _ => { + r_panic!("Unexpected token in drain clause: {clause:#?}"); + } + } + } + } + + // TODO: fix ordering of blocks in new parser, may have to rewrite the whole parser to use &[char] + // if let Token::OpenBrace = &clause[i] { + // // code block to execute at each loop iteration + // let braced_tokens = get_braced_tokens(&clause[i..], BRACES)?; + // // recursion + // block.extend(parse(braced_tokens)?); + // // i += 2 + braced_tokens.len(); + // } + + Ok(Clause::CopyLoop { + source, + targets, + block, + is_draining, + }) +} + +fn parse_if_else_clause( + clause: &[Token], +) -> Result, String> { + // skip first token, assumed to start with if + let mut i = 1usize; + let mut not = false; + if let Token::Not = &clause[i] { + not = true; + i += 1; + } + + let condition_start_token = i; + + i += 1; + while let Some(token) = clause.get(i) { + if let Token::OpenBrace = token { + break; + } + i += 1; + } + r_assert!( + i < clause.len(), + "Expected condition and block in if statement: {clause:#?}" + ); + + let condition = Expression::parse(&clause[condition_start_token..i])?; + + let block_one = { + let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; + i += 2 + block_tokens.len(); + parse(block_tokens)? + }; + + let block_two = if let Some(Token::Else) = &clause.get(i) { + i += 1; + let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; + // i += 2 + block_tokens.len(); + Some(parse(block_tokens)?) + } else { + None + }; + + Ok(match (not, block_one, block_two) { + (false, block_one, block_two) => Clause::IfElse { + condition, + if_block: Some(block_one), + else_block: block_two, + }, + (true, block_one, block_two) => Clause::IfElse { + condition, + if_block: block_two, + else_block: Some(block_one), + }, + }) +} + +fn parse_output_clause(clause: &[Token]) -> Result, String> { + let mut i = 1usize; + + let expr_tokens = &clause[i..(clause.len() - 1)]; + let expr = Expression::parse(expr_tokens)?; + i += expr_tokens.len(); + + let Token::Semicolon = &clause[i] else { + r_panic!("Invalid token at end of output clause: {clause:#?}"); + }; + + Ok(Clause::OutputValue { value: expr }) +} + +fn parse_input_clause(clause: &[Token]) -> Result, String> { + let mut i = 1usize; + + let (var, len) = parse_var_target(&clause[i..])?; + i += len; + + let Token::Semicolon = &clause[i] else { + r_panic!("Invalid token at end of input clause: {clause:#?}"); + }; + + Ok(Clause::InputVariable { var }) +} + +fn parse_assert_clause(clause: &[Token]) -> Result, String> { + let mut i = 1usize; + + let (var, len) = parse_var_target(&clause[i..])?; + i += len; + + if let Token::Unknown = &clause[i] { + Ok(Clause::AssertVariableValue { var, value: None }) + } else { + let Token::Equals = &clause[i] else { + r_panic!("Expected assertion value in assert clause: {clause:#?}"); + }; + i += 1; + + let Token::Semicolon = &clause[clause.len() - 1] else { + r_panic!("Invalid token at end of assert clause: {clause:#?}"); + }; + + let remaining = &clause[i..(clause.len() - 1)]; + let expr = Expression::parse(remaining)?; + + Ok(Clause::AssertVariableValue { + var, + value: Some(expr), + }) + } +} + +fn parse_brainfuck_clause( + clause: &[Token], +) -> Result, String> { + // bf {++--<><} + // bf @3 {++--<><} + // bf clobbers var1 var2 {++--<><} + // bf @2 clobbers *arr {++--<><} + + let mut clobbers = Vec::new(); + let mut i = 1usize; + + // check for location specifier + let (mem_offset, len) = TC::parse_location_specifier(&clause[i..])?; + i += len; + + if let Token::Clobbers = &clause[i] { + i += 1; + + loop { + match &clause[i] { + Token::Name(_) | Token::Asterisk => { + let (var, len) = parse_var_target(&clause[i..])?; + i += len; + clobbers.push(var); + } + Token::OpenBrace => { + break; + } + _ => { + r_panic!("Unexpected token in drain clause: {clause:#?}"); + } + } + } + } + + let bf_tokens = get_braced_tokens(&clause[i..], BRACES)?; + let mut ops = Vec::new(); + let mut j = 0; + while j < bf_tokens.len() { + match &bf_tokens[j] { + Token::OpenBrace => { + // embedded mastermind + let block_tokens = get_braced_tokens(&bf_tokens[j..], BRACES)?; + let clauses = parse(block_tokens)?; + ops.push(ExtendedOpcode::Block(clauses)); + j += block_tokens.len() + 2; + } + token @ _ => { + ops.push(ExtendedOpcode::Opcode(OC::from_token(token)?)); + j += 1; + } + } + } + + Ok(Clause::InlineBrainfuck { + location_specifier: mem_offset, + clobbered_variables: clobbers, + operations: ops, + }) +} + +fn parse_function_definition_clause( + clause: &[Token], +) -> Result, String> { + let mut i = 1usize; + // function name + let Token::Name(name) = &clause[i] else { + r_panic!("Expected function name after in function definition clause: {clause:#?}"); + }; + let mut args = Vec::new(); + i += 1; + let Token::OpenParenthesis = &clause[i] else { + r_panic!("Expected argument list in function definition clause: {clause:#?}"); + }; + let arg_tokens = get_braced_tokens(&clause[i..], PARENTHESES)?; + let mut j = 0usize; + // parse function argument names + while j < arg_tokens.len() { + // break if no more arguments + let (Token::Cell | Token::Struct) = &arg_tokens[j] else { + break; + }; + let (var, len) = parse_var_definition(&arg_tokens[j..], false)?; + j += len; + + args.push(var); + + if j >= arg_tokens.len() { + break; + } else if let Token::Comma = &arg_tokens[j] { + j += 1; + } else { + r_panic!("Unexpected token in function definition arguments: {arg_tokens:#?}"); + } + } + + i += 2 + arg_tokens.len(); + + // recursively parse the inner block + let Token::OpenBrace = &clause[i] else { + r_panic!("Expected execution block in function definition: {clause:#?}"); + }; + + let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; + let parsed_block = parse(block_tokens)?; + + Ok(Clause::DefineFunction { + name: name.clone(), + arguments: args, + block: parsed_block, + }) +} + +fn parse_function_call_clause(clause: &[Token]) -> Result, String> { + let mut i = 0usize; + // Okay I didn't know this rust syntax, could have used it all over the place + let Token::Name(name) = &clause[i] else { + r_panic!("Expected function identifier at start of function call clause: {clause:#?}"); + }; + let mut args = Vec::new(); + i += 1; + + let Token::OpenParenthesis = &clause[i] else { + r_panic!("Expected argument list in function call clause: {clause:#?}"); + }; + let arg_tokens = get_braced_tokens(&clause[i..], PARENTHESES)?; + + let mut j = 0usize; + while j < arg_tokens.len() { + // this used to be in the while condition but moved it here to check for the case of no arguments + let Token::Name(_) = &arg_tokens[j] else { + break; + }; + let (var, len) = parse_var_target(&arg_tokens[j..])?; + j += len; + + args.push(var); + + if j >= arg_tokens.len() { + break; + } else if let Token::Comma = &arg_tokens[j] { + j += 1; + } else { + r_panic!("Unexpected token in function call arguments: {arg_tokens:#?}"); + } + } + + i += 2 + arg_tokens.len(); + + let Token::Semicolon = &clause[i] else { + r_panic!("Expected clause delimiter at end of function call clause: {clause:#?}"); + }; + + Ok(Clause::CallFunction { + function_name: name.clone(), + arguments: args, + }) +} + +/// get a clause's tokens, typically a line, bounded by ; +fn get_clause_tokens(tokens: &[Token]) -> Result, String> { + if tokens.len() == 0 { + Ok(None) + } else { + let mut i = 0usize; + while i < tokens.len() { + match tokens[i] { + Token::OpenBrace => { + let braced_block = get_braced_tokens(&tokens[i..], BRACES)?; + i += 2 + braced_block.len(); + // handle blocks marking the end of clauses, if/else being the exception + if i < tokens.len() { + if let Token::Else = tokens[i] { + i += 1; + let else_block = get_braced_tokens(&tokens[i..], BRACES)?; + i += 2 + else_block.len(); + } + } + return Ok(Some(&tokens[..i])); + } + Token::Semicolon => { + i += 1; + return Ok(Some(&tokens[..i])); + } + _ => { + i += 1; + } + } + } + + r_panic!("No clause could be found in: {tokens:#?}"); + } +} + +const SQUARE_BRACKETS: (Token, Token) = (Token::OpenSquareBracket, Token::ClosingSquareBracket); +const BRACES: (Token, Token) = (Token::OpenBrace, Token::ClosingBrace); +const PARENTHESES: (Token, Token) = (Token::OpenParenthesis, Token::ClosingParenthesis); +const ANGLED_BRACKETS: (Token, Token) = (Token::LessThan, Token::MoreThan); +// this should be a generic function but rust doesn't support enum variants as type arguments yet +// find tokens bounded by matching brackets +// TODO: make an impl for &[Token] and put all these functions in it +fn get_braced_tokens(tokens: &[Token], braces: (Token, Token)) -> Result<&[Token], String> { + let (open_brace, closing_brace) = (discriminant(&braces.0), discriminant(&braces.1)); + // find corresponding bracket, the depth check is unnecessary but whatever + let len = { + let mut i = 1usize; + let mut depth = 1; + while i < tokens.len() && depth > 0 { + let g = discriminant(&tokens[i]); + if g == open_brace { + depth += 1; + } else if g == closing_brace { + depth -= 1; + } + i += 1; + } + i + }; + + if len >= 2 { + if open_brace == discriminant(&tokens[0]) && closing_brace == discriminant(&tokens[len - 1]) + { + return Ok(&tokens[1..(len - 1)]); + } + } + r_panic!("Invalid braced tokens: {tokens:#?}"); +} diff --git a/compiler/src/tokeniser.rs b/compiler/src/parser/old_tokeniser.rs similarity index 69% rename from compiler/src/tokeniser.rs rename to compiler/src/parser/old_tokeniser.rs index dae5c3a..45f8bdc 100644 --- a/compiler/src/tokeniser.rs +++ b/compiler/src/parser/old_tokeniser.rs @@ -16,7 +16,7 @@ pub fn tokenise(source: &str) -> Result, String> { // mappings are a list of key * value tuples because we are doing "starts with" searches, // meaning we can't look up in a hashtable let mappings = [ - (" ", Token::None), + // (" ", Token::None), (";", Token::Semicolon), ("output", Token::Output), ("input", Token::Input), @@ -189,133 +189,3 @@ fn tokenise_raw_string_literal(raw: &str) -> Result { } Ok(built_string) } - -#[derive(Debug, Clone, PartialEq)] -pub enum Token { - None, - Output, - Input, - Fn, - Cell, - Struct, - While, - If, - Not, - Else, - OpenBrace, - ClosingBrace, - OpenSquareBracket, - ClosingSquareBracket, - OpenParenthesis, - ClosingParenthesis, - LessThan, - MoreThan, - Comma, - Dot, - Asterisk, - At, - Copy, - Drain, - Into, - Bf, - Clobbers, - Assert, - Equals, - Unknown, - Name(String), - Digits(String), - String(String), - Character(char), - True, - False, - Minus, - Plus, - EqualsSign, - Semicolon, - Caret, -} - -#[cfg(test)] -mod tokeniser_tests { - use crate::tokeniser::{tokenise, Token}; - - fn _tokenisation_test(input_str: &str, desired_output: &[Token]) { - let input_string = String::from(input_str); - let actual_output = tokenise(&input_string).unwrap(); - println!("desired: {desired_output:#?}"); - println!("actual: {actual_output:#?}"); - assert!(actual_output.iter().eq(desired_output)); - } - - #[test] - fn character_literals_1() { - _tokenisation_test( - r#"'a' 'b' 'c' ' '"#, - &[ - Token::Character('a'), - Token::Character('b'), - Token::Character('c'), - Token::Character(' '), - ], - ); - } - - #[test] - fn character_literals_2() { - _tokenisation_test(r#"'\n'"#, &[Token::Character('\n')]); - } - - #[test] - fn character_literals_3() { - _tokenisation_test(r#"'"'"#, &[Token::Character('"')]); - } - - #[test] - fn character_literals_4() { - _tokenisation_test(r#"'\''"#, &[Token::Character('\'')]); - } - - #[test] - #[should_panic] - fn character_literals_5() { - _tokenisation_test(r#"'\'"#, &[Token::Character('\\')]); - } - - #[test] - #[should_panic] - fn character_literals_6() { - _tokenisation_test(r#"'aa'"#, &[Token::String(String::from("aa"))]); - } - - #[test] - fn string_literals_1() { - _tokenisation_test("\"hello\"", &[Token::String(String::from("hello"))]); - } - - #[test] - fn string_literals_2() { - _tokenisation_test(r#""""#, &[Token::String(String::from(""))]); - } - - #[test] - fn string_literals_2a() { - _tokenisation_test( - r#""""""#, - &[ - Token::String(String::from("")), - Token::String(String::from("")), - ], - ); - } - - #[test] - fn string_literals_3() { - _tokenisation_test( - r#""\"" " ""#, - &[ - Token::String(String::from("\"")), - Token::String(String::from(" ")), - ], - ); - } -} diff --git a/compiler/src/parser/parser.rs b/compiler/src/parser/parser.rs new file mode 100644 index 0000000..41567a0 --- /dev/null +++ b/compiler/src/parser/parser.rs @@ -0,0 +1,473 @@ +use super::{ + expressions::Expression, + tokeniser::{ + find_and_advance, find_next, find_next_whitespace, next_token, skip_whitespace, Token, + }, + types::{ + Clause, LocationSpecifier, Reference, TapeCellLocation, VariableTarget, + VariableTargetReferenceChain, VariableTypeReference, + }, +}; +use crate::{ + backend::{ + bf::TapeCell, + bf2d::TapeCell2D, + common::{OpcodeVariant, TapeCellVariant}, + }, + macros::macros::{r_assert, r_panic}, + parser::types::VariableTypeDefinition, +}; + +pub fn parse_clause( + chars: &mut &[char], +) -> Result, String> { + // TODO: refactor this? inconsistent function calling in different parsing functions + let next_token = + |s| next_token(s).map_err(|()| format!("Invalid token: {}", chars.iter().collect())); + + let mut s = chars; + Ok(match next_token(s)? { + // None signifies end of input + None => None, + Token::If => parse_if_else_clause(chars)?, + Token::While => parse_while_clause(chars)?, + Token::Fn => parse_function_definition_clause(chars)?, + Token::Struct => { + let Token::Name(_) = next_token(s)? else { + // TODO: add source snippet + r_panic!("Expected identifier after `struct` keyword."); + }; + match next_token(s)? { + Token::OpenBrace => parse_struct_definition_clause(chars)?, + _ => parse_let_clause(chars)?, + } + } + Token::Cell => parse_let_clause(chars)?, + _ => todo!(), + }) +} + +fn parse_block(chars: &mut &[char]) -> Result>, String> { + todo!() +} + +//////////////////////////// +//////////////////////////// +//////////////////////////// + +impl TapeCellLocation for TapeCell { + fn parse_location_specifier( + chars: &mut &[char], + ) -> Result, String> { + let mut s = chars; + let Token::At = next_token(s)? else { + return Ok(LocationSpecifier::None); + }; + *chars = s; + + match next_token(s)? { + Token::Minus | Token::Digits(_) => Ok(LocationSpecifier::Cell(parse_integer(chars)?)), + // variable location specifier: + Token::Name(_) => Ok(LocationSpecifier::Variable(parse_var_target(chars)?)), + // TODO: add source snippet + _ => r_panic!("Invalid location specifier: ",), + } + } + + fn to_positive_cell_offset(&self) -> Result { + r_assert!(*self >= 0, "Expected non-negative cell offset."); + Ok(*self as usize) + } +} + +impl TapeCellLocation for TapeCell2D { + fn parse_location_specifier( + chars: &mut &[char], + ) -> Result, String> { + let mut s = chars; + let Token::At = next_token(s)? else { + return Ok(LocationSpecifier::None); + }; + *chars = s; + + match next_token(s)? { + Token::OpenParenthesis => { + // parse a 2-tuple + let tuple = parse_integer_tuple::<2>(chars)?; + Ok(LocationSpecifier::Cell(TapeCell2D(tuple[0], tuple[1]))) + } + Token::Minus | Token::Digits(_) => Ok(LocationSpecifier::Cell(TapeCell2D( + parse_integer(chars)?, + 0, + ))), + // variable location specifier: + Token::Name(_) => Ok(LocationSpecifier::Variable(parse_var_target(chars)?)), + // TODO: add source snippet + _ => r_panic!("Invalid location specifier: ",), + } + } + + fn to_positive_cell_offset(&self) -> Result { + r_assert!( + self.1 == 0 && self.0 >= 0, + "Expected non-negative 1st dimensional cell offset (i.e. (x,y) where y=0)." + ); + Ok(self.0 as usize) + } +} + +fn parse_var_type_definition( + chars: &mut &[char], +) -> Result, String> { + let mut var_type = match next_token(chars)? { + Token::Cell => VariableTypeReference::Cell, + Token::Struct => { + let Token::Name(struct_name) = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected struct type name in variable definition."); + }; + + VariableTypeReference::Struct(struct_name) + } + _ => { + // TODO: add source snippet + r_panic!("Unexpected token in variable type definition."); + } + }; + + // parse array specifiers + { + let mut s = chars; + while let Token::OpenSquareBracket = next_token(s)? { + var_type = VariableTypeReference::Array(Box::new(var_type), parse_array_length(chars)?); + s = chars; + } + } + + let Token::Name(name) = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected name in variable definition."); + }; + + Ok(VariableTypeDefinition { + var_type, + name, + location_specifier: TC::parse_location_specifier(chars)?, + }) +} + +/// parse the subscript of an array variable, e.g. [4] [6] [0] +/// must be compile-time constant +fn parse_subscript(chars: &mut &[char]) -> Result { + let Token::OpenSquareBracket = next_token(chars)? else { + // TODO: add program snippet + r_panic!("Expected `[` in array subscript."); + }; + let Token::Digits(digits) = next_token(chars)? else { + // TODO: add program snippet + r_panic!("Expected natural number in array subscript."); + }; + let Token::ClosingSquareBracket = next_token(chars)? else { + // TODO: add program snippet + r_panic!("Expected `]` in array subscript."); + }; + // TODO: fix duplicate error here + digits + .parse::() + .map_err(|_| Err(format!("Expected natural number in array subscript."))) +} + +/// parse_array_subscript but with a length check +fn parse_array_length(chars: &mut &[char]) -> Result { + let len = parse_subscript(chars)?; + // TODO: add source snippet + r_assert!(len > 0, "Array variable cannot be zero-length."); + Ok(len) +} + +fn parse_var_target(chars: &mut &[char]) -> Result { + let is_spread = { + let s = chars; + if let Token::Asterisk = next_token(s)? { + *chars = s; + true + } else { + false + } + }; + + let Token::Name(base_var_name) = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected identifier in variable target identifier."); + }; + + let mut ref_chain = vec![]; + let mut s = chars; + loop { + match next_token(s)? { + Token::OpenSquareBracket => { + let index = parse_subscript(chars)?; + ref_chain.push(Reference::Index(index)); + } + Token::Dot => { + let Token::Name(subfield_name) = next_token(s)? else { + // TODO: add source snippet + r_panic!("Expected subfield name in variable target identifier."); + }; + ref_chain.push(Reference::NamedField(subfield_name)); + } + _ => { + break; + } + } + *chars = s; + } + + Ok(VariableTarget { + name: base_var_name, + subfields: if ref_chain.len() > 0 { + Some(VariableTargetReferenceChain(ref_chain)) + } else { + None + }, + is_spread, + }) +} + +fn parse_integer(chars: &mut &[char]) -> Result { + let mut token = next_token(chars); + let mut is_negative = false; + if let Ok(Token::Minus) = token { + is_negative = true; + token = next_token(chars)?; + } + let Ok(Token::Digits(digits)) = token else { + // TODO: add source snippet + r_panic!("Expected integer.") + }; + digits + .parse::() + .map(|magnitude| match is_negative { + // TODO: truncation error handling + false => magnitude as i32, + true => -(magnitude as i32), + }) + // TODO: fix duplicate error here + .map_err(|_| Err(format!("Expected integer."))) +} + +fn parse_integer_tuple(chars: &mut &[char]) -> Result<[i32; LENGTH], String> { + let Ok(Token::OpenParenthesis) = next_token(chars) else { + // TODO: add source snippet + r_panic!("Expected opening parenthesis in tuple.") + }; + + let mut tuple = [0; LENGTH]; + for (j, element) in tuple.iter_mut().enumerate() { + *element = parse_integer(chars)?; + + if j < LENGTH - 1 { + let Ok(Token::Comma) = next_token(chars) else { + // TODO: add source snippet + r_panic!("Expected comma in tuple."); + }; + } + } + let Ok(Token::ClosingParenthesis) = next_token(chars) else { + // TODO: add source snippet + r_panic!("Expected closing parenthesis in tuple."); + }; + + Ok(tuple) +} + +//////////////////////////// +//////////////////////////// +//////////////////////////// + +fn parse_if_else_clause( + chars: &mut &[char], +) -> Result, String> { + let Ok(Token::If) = next_token(chars) else { + // TODO: add program snippet + r_panic!("Expected \"if\" in if-else clause."); + }; + + let is_not = { + let s = chars; + if let Token::Not = next_token(s)? { + *chars = s; + true + } else { + false + } + }; + + let Ok(condition_char_len) = find_next(chars, '{') else { + // TODO: add program snippet to errors + r_panic!("Expected code block in if-else clause."); + }; + let condition = Expression::parse(&chars[..condition_char_len])?; + *chars = &chars[condition_char_len..]; + + let block_one = parse_block(chars)?; + + let block_two = { + let mut s = chars; + if let Token::Else = next_token(s)? { + *chars = s; + Some(parse_block(chars)?) + } else { + None + } + }; + + Ok(match (is_not, block_one, block_two) { + (false, if_block, None) => Clause::If { + condition, + if_block, + }, + (true, if_not_block, None) => Clause::IfNot { + condition, + if_not_block, + }, + (false, if_block, Some(else_block)) => Clause::IfElse { + condition, + if_block, + else_block, + }, + (true, if_not_block, Some(else_block)) => Clause::IfNotElse { + condition, + if_not_block, + else_block, + }, + }) +} + +fn parse_while_clause( + chars: &mut &[char], +) -> Result, String> { + let Token::While = next_token(chars)? else { + // TODO: add program snippet + r_panic!("Expected \"while\" in while clause."); + }; + + let Ok(condition_char_len) = find_next(chars, '{') else { + // TODO: add program snippet to errors + r_panic!("Expected code block in while clause."); + }; + let condition = Expression::parse(&chars[..condition_char_len])?; + *chars = &chars[condition_char_len..]; + + // TODO: make while loops support expressions + let Expression::VariableReference(condition_variable) = condition else { + r_panic!("While clause expected variable target condition."); + }; + + let loop_block = parse_block(chars)?; + + Ok(Clause::While { + var: condition_variable, + block: loop_block, + }) +} + +fn parse_function_definition_clause( + chars: &mut &[char], +) -> Result, String> { + let Token::Fn = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected `fn` in function definition clause."); + }; + + let Token::Name(function_name) = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected name in function definition clause."); + }; + + let Token::OpenParenthesis = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected argument list in function definition clause."); + }; + let mut arguments = vec![]; + loop { + { + let mut s = chars; + if let Token::ClosingParenthesis = next_token(s)? { + *chars = s; + break; + } + } + arguments.push(parse_var_type_definition(chars)?); + + let token = next_token(chars)?; + match token { + Token::ClosingParenthesis => break, + Token::Comma => (), + Some(token) => r_panic!("Unexpected token in function argument list: `{token}`."), + None => r_panic!("Expected token in function argument list."), + } + } + + Ok(Clause::DefineFunction { + name: function_name, + arguments, + block: parse_block(chars)?, + }) +} + +/// Parse tokens representing a struct definition into a clause +fn parse_struct_definition_clause( + chars: &mut &[char], +) -> Result, String> { + let Ok(Token::Struct) = next_token(chars) else { + // TODO: add source snippet + r_panic!("Expected `struct` in struct definition."); + }; + + let Ok(Token::Name(name)) = next_token(chars) else { + // TODO: add source snippet + r_panic!("Expected name in struct definition."); + }; + + let Ok(Token::OpenBrace) = next_token(chars) else { + // TODO: add source snippet + r_panic!("Expected `{{` in struct clause."); + }; + + let mut fields = vec![]; + loop { + let field = parse_var_type_definition(chars)?; + fields.push(field.try_into()?); + let Ok(Token::Semicolon) = next_token(chars) else { + // TODO: add source snippet + r_panic!("Expected semicolon after struct definition field."); + }; + if let Ok(Token::ClosingBrace) = next_token(chars) { + break; + } + } + + Ok(Clause::DefineStruct { name, fields }) +} + +/// parse variable declarations and definitions. +/// e.g. `cell x = 0;` or `struct DummyStruct y;` +fn parse_let_clause(chars: &mut &[char]) -> Result, String> { + let var = parse_var_type_definition(chars)?; + + let mut s = chars; + if let Ok(Token::EqualsSign) = next_token(s) { + chars = s; + let expr = Expression::parse(find_and_advance(chars, ';'))?; + let Ok(Token::Semicolon) = next_token(chars) else { + r_panic!("Expected semicolon after variable definition."); + }; + return Ok(Clause::DefineVariable { var, value: expr }); + } + + let Ok(Token::Semicolon) = next_token(chars) else { + r_panic!("Expected semicolon after variable declaration."); + }; + Ok(Clause::DeclareVariable { var }) +} diff --git a/compiler/src/parser/tests.rs b/compiler/src/parser/tests.rs new file mode 100644 index 0000000..447e5c6 --- /dev/null +++ b/compiler/src/parser/tests.rs @@ -0,0 +1,370 @@ +#[cfg(test)] +mod tokeniser_tests { + use super::super::{parser::next_token, types::Token}; + + fn tokenise(input_str: &str) -> Result, String> { + let mut tokens = vec![]; + let chars = input_str.chars().collect::>(); + let mut c = &chars; + while let Some(token) = next_token(&c)? { + tokens.push(token); + } + Ok(tokens) + } + + fn _tokenisation_test(input_str: &str, desired_output: &[Token]) { + let input_string = String::from(input_str); + let actual_output = tokenise(&input_string).unwrap(); + println!("desired: {desired_output:#?}"); + println!("actual: {actual_output:#?}"); + assert!(actual_output.iter().eq(desired_output)); + } + + #[test] + fn character_literals_1() { + _tokenisation_test( + r#"'a' 'b' 'c' ' '"#, + &[ + Token::Character('a'), + Token::Character('b'), + Token::Character('c'), + Token::Character(' '), + ], + ); + } + + #[test] + fn character_literals_2() { + _tokenisation_test(r#"'\n'"#, &[Token::Character('\n')]); + } + + #[test] + fn character_literals_3() { + _tokenisation_test(r#"'"'"#, &[Token::Character('"')]); + } + + #[test] + fn character_literals_4() { + _tokenisation_test(r#"'\''"#, &[Token::Character('\'')]); + } + + #[test] + #[should_panic] + fn character_literals_5() { + _tokenisation_test(r#"'\'"#, &[Token::Character('\\')]); + } + + #[test] + #[should_panic] + fn character_literals_6() { + _tokenisation_test(r#"'aa'"#, &[Token::String(String::from("aa"))]); + } + + #[test] + fn string_literals_1() { + _tokenisation_test("\"hello\"", &[Token::String(String::from("hello"))]); + } + + #[test] + fn string_literals_2() { + _tokenisation_test(r#""""#, &[Token::String(String::from(""))]); + } + + #[test] + fn string_literals_2a() { + _tokenisation_test( + r#""""""#, + &[ + Token::String(String::from("")), + Token::String(String::from("")), + ], + ); + } + + #[test] + fn string_literals_3() { + _tokenisation_test( + r#""\"" " ""#, + &[ + Token::String(String::from("\"")), + Token::String(String::from(" ")), + ], + ); + } +} + +#[cfg(test)] +mod parser_tests { + use super::super::{ + expressions::Expression, + types::{ + Clause, ExtendedOpcode, LocationSpecifier, Token, VariableTarget, + VariableTypeDefinition, VariableTypeReference, + }, + }; + use crate::backend::{ + bf::TapeCell, + bf2d::{Opcode2D, TapeCell2D}, + }; + + #[test] + fn parse_if_1() { + assert!(parse::(&[ + // if true {{}} + Token::If, + Token::True, + Token::OpenBrace, + Token::OpenBrace, + Token::ClosingBrace, + Token::ClosingBrace, + ]) + .unwrap() + .iter() + .eq(&[Clause::IfElse { + condition: Expression::NaturalNumber(1), + if_block: Some(vec![Clause::::Block(vec![])]), + else_block: None, + }])); + } + + #[test] + fn end_tokens_1() { + let _ = parse::(&[Token::Clobbers]).expect_err(""); + } + + #[test] + fn end_tokens_2() { + let _ = parse::(&[Token::Semicolon]).unwrap(); + let _ = parse::(&[Token::Semicolon, Token::Semicolon]).unwrap(); + let _ = + parse::(&[Token::Semicolon, Token::Semicolon, Token::Semicolon]) + .unwrap(); + } + + #[test] + fn end_tokens_3() { + let _ = parse::(&[Token::Cell, Token::Semicolon]).expect_err(""); + } + + #[test] + fn while_condition_1() { + assert!(parse::(&[ + Token::While, + Token::Name(String::from("x")), + Token::OpenBrace, + Token::OpenBrace, + Token::ClosingBrace, + Token::ClosingBrace, + ]) + .unwrap() + .iter() + .eq(&[Clause::WhileLoop { + var: VariableTarget { + name: String::from("x"), + subfields: None, + is_spread: false + }, + block: vec![Clause::Block(vec![])] + }])) + } + + #[test] + fn two_dimensional_1() { + assert!(parse::(&[ + Token::Cell, + Token::Name(String::from("x")), + Token::At, + Token::OpenParenthesis, + Token::Digits(String::from("0")), + Token::Comma, + Token::Digits(String::from("1")), + Token::ClosingParenthesis, + Token::Semicolon, + ]) + .unwrap_err() + .contains("Invalid location specifier")); + } + + #[test] + fn two_dimensional_2() { + assert!(parse::(&[ + Token::Cell, + Token::Name(String::from("x")), + Token::At, + Token::OpenParenthesis, + Token::Digits(String::from("0")), + Token::Comma, + Token::Digits(String::from("1")), + Token::ClosingParenthesis, + Token::Semicolon, + ]) + .unwrap() + .iter() + .eq(&[Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("x"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::Cell(TapeCell2D(0, 1)) + } + }])); + } + + #[test] + fn two_dimensional_3() { + assert!(parse::(&[ + Token::Cell, + Token::Name(String::from("xyz")), + Token::At, + Token::OpenParenthesis, + Token::Minus, + Token::Digits(String::from("10")), + Token::Comma, + Token::Minus, + Token::Digits(String::from("101")), + Token::ClosingParenthesis, + Token::Semicolon, + ]) + .unwrap() + .iter() + .eq(&[Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("xyz"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::Cell(TapeCell2D(-10, -101)) + } + }])); + } + + #[test] + fn var_v() { + assert!(parse::(&[ + Token::Cell, + Token::Name(String::from("v")), + Token::Semicolon + ]) + .unwrap() + .iter() + .eq(&[Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None + } + }])) + } + + #[test] + fn inline_bf_1() { + assert!(parse::(&[ + Token::Cell, + Token::Name(String::from("v")), + Token::Semicolon, + Token::Bf, + Token::OpenBrace, + Token::Plus, + Token::OpenBrace, + Token::Cell, + Token::Name(String::from("v")), + Token::Semicolon, + Token::ClosingBrace, + Token::Minus, + Token::ClosingBrace + ]) + .unwrap() + .iter() + .eq(&[ + Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None + } + }, + Clause::InlineBrainfuck { + location_specifier: LocationSpecifier::None, + clobbered_variables: vec![], + operations: vec![ + ExtendedOpcode::Opcode(Opcode2D::Add), + ExtendedOpcode::Block(vec![Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None + } + }]), + ExtendedOpcode::Opcode(Opcode2D::Subtract), + ] + } + ])) + } + + // TODO: make context-based parser for brainfuck and refactor these tests + #[test] + fn inline_bf_2() { + assert!(parse::(&[ + Token::Cell, + Token::Name(String::from("v")), + Token::Semicolon, + Token::Bf, + Token::OpenBrace, + Token::Name(String::from("v")), + Token::OpenBrace, + Token::Cell, + Token::Name(String::from("v")), + Token::Semicolon, + Token::ClosingBrace, + Token::Caret, + Token::ClosingBrace + ]) + .unwrap() + .iter() + .eq(&[ + Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None + } + }, + Clause::InlineBrainfuck { + location_specifier: LocationSpecifier::None, + clobbered_variables: vec![], + operations: vec![ + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Block(vec![Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None + } + }]), + ExtendedOpcode::Opcode(Opcode2D::Up), + ] + } + ])) + } + + #[test] + fn inline_bf_3() { + assert!(parse::(&[ + Token::Bf, + Token::OpenBrace, + Token::Name(String::from("vvvv")), + Token::MoreThan, + Token::ClosingBrace + ]) + .unwrap() + .iter() + .eq(&[Clause::InlineBrainfuck { + location_specifier: LocationSpecifier::None, + clobbered_variables: vec![], + operations: vec![ + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Opcode(Opcode2D::Right), + ] + }])) + } +} diff --git a/compiler/src/parser/tokeniser.rs b/compiler/src/parser/tokeniser.rs new file mode 100644 index 0000000..08d8ae1 --- /dev/null +++ b/compiler/src/parser/tokeniser.rs @@ -0,0 +1,136 @@ +// TODO: make an impl for a tokeniser, inverse-builder pattern? +// have a function to peek, then accept changes, so we don't double hangle tokens + +#[derive(Debug, Clone, PartialEq)] +pub enum Token { + None, + Output, + Input, + Fn, + Cell, + Struct, + While, + If, + Not, + Else, + OpenBrace, + ClosingBrace, + OpenSquareBracket, + ClosingSquareBracket, + OpenParenthesis, + ClosingParenthesis, + Comma, + Dot, + Asterisk, + At, + Copy, + Drain, + Into, + Bf, + Clobbers, + Assert, + Equals, + Unknown, + Name(String), + Digits(String), + String(String), + Character(char), + True, + False, + Minus, + Plus, + PlusEquals, + MinusEquals, + EqualsSign, + Semicolon, +} + +/// Get the next token from chars, advance the passed in pointer +pub fn next_token(chars: &mut &[char]) -> Result { + // skip any whitespace + skip_whitespace(chars)?; + + // TODO: this is flawed, what about cell g=5;? + let token_len = find_next_whitespace(*chars)?; + + Ok(match token_len { + 0 => return Err(()), + 1 => match chars[0] { + '{' => Token::OpenBrace, + '}' => Token::ClosingBrace, + _ => todo!(), + }, + 2 => match chars[0..2] { + ['b', 'f'] => Token::Bf, + ['i', 'f'] => Token::If, + _ => todo!(), + }, + 3 => match chars[0..3] { + ['n', 'o', 't'] => Token::Not, + _ => todo!(), + }, + 4 => match chars[0..4] { + ['c', 'e', 'l', 'l'] => Token::Cell, + ['e', 'l', 's', 'e'] => Token::Else, + ['t', 'r', 'u', 'e'] => Token::True, + _ => todo!(), + }, + 5 => match chars[0..5] { + ['w', 'h', 'i', 'l', 'e'] => Token::While, + _ => todo!(), + }, + _ => todo!(), + }) +} + +// TODO: figure out errors for these helper functions +pub fn find_next(chars: &[char], character: char) -> Result { + let mut i = 0; + loop { + let Some(c) = chars.get(i) else { + return Err(()); + }; + + if c == character { + break; + } + i += 1; + } + Ok(i) +} + +pub fn find_and_advance<'a>(chars: &'a mut &[char], character: char) -> Result<&'a [char], ()> { + let substr_len = find_next(chars, character)?; + let chars_before = chars[..substr_len]; + chars = chars[substr_len..]; + chars_before +} + +pub fn skip_whitespace(chars: &mut &[char]) -> Result<(), ()> { + loop { + let Some(c) = chars.get(0) else { + return Err(()); + }; + + if !c.is_whitespace() { + break; + } + *chars = &chars[1..]; + } + Ok(()) +} + +pub fn find_next_whitespace(chars: &[char]) -> Result { + let mut i = 0; + loop { + let Some(c) = chars.get(i) else { + return Err(()); + }; + + if c.is_whitespace() { + break; + } + i += 1; + } + Ok(i) +} diff --git a/compiler/src/parser/types.rs b/compiler/src/parser/types.rs new file mode 100644 index 0000000..5961e81 --- /dev/null +++ b/compiler/src/parser/types.rs @@ -0,0 +1,287 @@ +use super::expressions::Expression; +use crate::macros::macros::r_panic; + +use std::fmt::Display; + +/// Clause type type variables: +/// - TC: TapeCell can be changed to implement 2D brainfuck, or other modifications +/// - OC: Opcode represents the valid Brainfuck Opcodes that we're generating (also used for 2D or other BF variants) +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +pub enum Clause { + None, + DeclareVariable { + var: VariableTypeDefinition, + }, + DefineVariable { + var: VariableTypeDefinition, + value: Expression, + }, + DefineStruct { + name: String, + fields: Vec, + }, + AddToVariable { + var: VariableTarget, + value: Expression, + self_referencing: bool, + }, + SetVariable { + var: VariableTarget, + value: Expression, + self_referencing: bool, + }, + AssertVariableValue { + var: VariableTarget, + // Some(constant) indicates we know the value, None indicates we don't know the value + // typically will either be used for assert unknown or assert 0 + value: Option, + }, + CopyLoop { + source: Expression, + targets: Vec, + block: Vec>, + is_draining: bool, + }, + While { + var: VariableTarget, + block: Vec>, + }, + OutputValue { + value: Expression, + }, + InputVariable { + var: VariableTarget, + }, + DefineFunction { + name: String, + // TODO: fix the type here, as function definitions don't actually need location specifiers and therefore don't need a tape cell type + arguments: Vec>, + block: Vec>, + }, + CallFunction { + function_name: String, + arguments: Vec, + }, + If { + condition: Expression, + if_block: Vec>, + }, + IfNot { + condition: Expression, + if_not_block: Vec>, + }, + IfElse { + condition: Expression, + if_block: Vec>, + else_block: Vec>, + }, + IfNotElse { + condition: Expression, + if_not_block: Vec>, + else_block: Vec>, + }, + Block(Vec>), + InlineBrainfuck { + location_specifier: LocationSpecifier, + clobbered_variables: Vec, + operations: Vec>, + }, +} + +pub trait TapeCellLocation +where + Self: Sized + Display, +{ + /// optionally parse a memory location specifier + /// let g @(4,2) = 68; + /// or + /// let p @3 = 68; + fn parse_location_specifier(chars: &mut &[char]) -> Result, String>; + + /// safely cast a 2D or 1D location specifier into a 1D non-negative cell offset, + /// for use with struct fields + fn to_positive_cell_offset(&self) -> Result; +} + +// extended brainfuck opcodes to include mastermind code blocks +#[derive(Debug, Clone)] +#[cfg_attr(test, derive(PartialEq))] +pub enum ExtendedOpcode { + Opcode(OC), + Block(Vec>), +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +/// the type of a variable according to the user, not validated yet as the parser does not keep track of types +pub enum VariableTypeReference { + Cell, + Struct(String), + Array(Box, usize), +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub enum LocationSpecifier { + None, + Cell(TC), + Variable(VariableTarget), +} +impl LocationSpecifier { + fn is_none(&self) -> bool { + matches!(self, LocationSpecifier::None) + } +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub struct VariableTypeDefinition { + pub name: String, + pub var_type: VariableTypeReference, + pub location_specifier: LocationSpecifier, + // Infinite {name: String, pattern: ???}, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct StructFieldTypeDefinition { + pub name: String, + pub field_type: VariableTypeReference, + pub location_offset_specifier: Option, +} +// let non_neg_location_specifier = match &var_def.location_specifier { +// LocationSpecifier::None => None, +// LocationSpecifier::Cell(l) => { +// // assert the y coordinate is 0 +// // r_assert!( +// // l.1 == 0, +// // "Struct field location specifiers do not support 2D grid cells: {var_def}" +// // ); +// r_assert!( +// l.0 >= 0, +// "Struct field location specifiers must be non-negative: {var_def}" +// ); +// Some(l.0 as usize) +// } +// LocationSpecifier::Variable(_) => { +// r_panic!( "Location specifiers in struct definitions must be relative, not variables: {var_def}") +// } +// }; +impl TryInto for VariableTypeDefinition +where + TC: TapeCellLocation, +{ + type Error = String; + + fn try_into(self) -> Result { + let location_offset_specifier = match &self.location_specifier { + LocationSpecifier::None => None, + LocationSpecifier::Cell(cell) => Some(match cell.to_positive_cell_offset() { + Ok(offset) => offset, + Err(err) => r_panic!("Cannot create struct field \"{self}\". {err}"), + }), + LocationSpecifier::Variable(_) => r_panic!( + "Location specifiers in struct definitions \ +must be relative, not variable." + ), + }; + Ok(StructFieldTypeDefinition { + name: self.name, + field_type: self.var_type, + location_offset_specifier, + }) + } +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub enum Reference { + NamedField(String), + Index(usize), +} + +/// Represents a list of subfield references after the `.` or `[x]` operators, e.g. `obj.h[6]` would have `['h', '[6]']` +// a bit verbose, not quite sure about this +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub struct VariableTargetReferenceChain(pub Vec); +/// Represents a target variable in an expression, this has no type informatino +#[derive(Debug, Clone, Hash, PartialEq, Eq)] +pub struct VariableTarget { + pub name: String, + pub subfields: Option, + pub is_spread: bool, +} +impl VariableTarget { + /// convert a definition to a target for use with definition clauses (as opposed to declarations) + pub fn from_definition(var_def: &VariableTypeDefinition) -> Self { + VariableTarget { + name: var_def.name.clone(), + subfields: None, + is_spread: false, + } + } +} + +impl Display for VariableTypeReference { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self { + VariableTypeReference::Cell => f.write_str("cell"), + VariableTypeReference::Struct(struct_name) => { + f.write_fmt(format_args!("struct {struct_name}")) + } + VariableTypeReference::Array(element_type, len) => { + f.write_fmt(format_args!("{element_type}[{len}]")) + } + } + } +} + +impl Display for VariableTypeDefinition { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&format!("{} {}", self.var_type, self.name))?; + match &self.location_specifier { + LocationSpecifier::Cell(_) | LocationSpecifier::Variable(_) => { + f.write_str(&format!(" {}", self.location_specifier))? + } + LocationSpecifier::None => (), + } + + Ok(()) + } +} + +impl Display for LocationSpecifier { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("@")?; + match self { + LocationSpecifier::Cell(cell) => f.write_str(&format!("{cell}"))?, + LocationSpecifier::Variable(var) => f.write_str(&format!("{var}"))?, + LocationSpecifier::None => (), + } + + Ok(()) + } +} + +impl Display for Reference { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Reference::NamedField(subfield_name) => f.write_str(&format!(".{subfield_name}"))?, + Reference::Index(index) => f.write_str(&format!("[{index}]"))?, + } + + Ok(()) + } +} + +impl Display for VariableTarget { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.is_spread { + f.write_str("*")?; + } + f.write_str(&self.name)?; + if let Some(subfield_refs) = &self.subfields { + for ref_step in subfield_refs.0.iter() { + f.write_str(&format!("{ref_step}"))?; + } + } + + Ok(()) + } +} From cf1a6167d88240600a865d60339d1fea6df5362f Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Fri, 7 Nov 2025 19:49:58 +1100 Subject: [PATCH 25/56] WIP: fix errors relating to partially completed parser --- compiler/src/backend/bf.rs | 2 +- compiler/src/backend/bf2d.rs | 2 +- compiler/src/backend/common.rs | 2 +- compiler/src/frontend.rs | 64 +++- compiler/src/lib.rs | 6 +- compiler/src/parser/expressions.rs | 14 +- compiler/src/parser/mod.rs | 9 +- compiler/src/parser/parser.rs | 153 +++++----- compiler/src/parser/tests.rs | 453 ++++++++++------------------- compiler/src/parser/tokeniser.rs | 110 ++++++- compiler/src/tests.rs | 45 ++- 11 files changed, 450 insertions(+), 410 deletions(-) diff --git a/compiler/src/backend/bf.rs b/compiler/src/backend/bf.rs index c1d8ff4..99d0fa7 100644 --- a/compiler/src/backend/bf.rs +++ b/compiler/src/backend/bf.rs @@ -4,7 +4,7 @@ use super::common::{ }; use crate::{ macros::macros::{r_assert, r_panic}, - parser::types::Token, + parser::tokeniser::Token, }; pub type TapeCell = i32; diff --git a/compiler/src/backend/bf2d.rs b/compiler/src/backend/bf2d.rs index 1b1ecae..a5a12a8 100644 --- a/compiler/src/backend/bf2d.rs +++ b/compiler/src/backend/bf2d.rs @@ -4,7 +4,7 @@ use super::common::{ }; use crate::{ macros::macros::{r_assert, r_panic}, - parser::types::Token, + parser::tokeniser::Token, }; use std::{fmt::Display, hash::Hash}; diff --git a/compiler/src/backend/common.rs b/compiler/src/backend/common.rs index bc9e744..cb62b18 100644 --- a/compiler/src/backend/common.rs +++ b/compiler/src/backend/common.rs @@ -3,7 +3,7 @@ use crate::{ frontend::{CellLocation, Instruction, MemoryId}, macros::macros::{r_assert, r_panic}, misc::{MastermindConfig, MastermindContext}, - parser::types::{TapeCellLocation, Token}, + parser::{tokeniser::Token, types::TapeCellLocation}, }; use std::{ diff --git a/compiler/src/frontend.rs b/compiler/src/frontend.rs index c591578..3799136 100644 --- a/compiler/src/frontend.rs +++ b/compiler/src/frontend.rs @@ -12,8 +12,8 @@ use crate::{ parser::{ expressions::Expression, types::{ - Clause, ExtendedOpcode, LocationSpecifier, Reference, StructFieldDefinition, - VariableDefinition, VariableTarget, VariableTargetReferenceChain, + Clause, ExtendedOpcode, LocationSpecifier, Reference, StructFieldTypeDefinition, + VariableTarget, VariableTargetReferenceChain, VariableTypeDefinition, VariableTypeReference, }, }, @@ -45,6 +45,8 @@ impl MastermindContext { // convert fields with 2D or 1D location specifiers to valid struct location specifiers scope.register_struct_definition(name, fields.clone())?; } + // also filter out None clauses (although there shouldn't be any) + Clause::None => (), _ => filtered_clauses_1.push(clause.clone()), } } @@ -315,7 +317,7 @@ impl MastermindContext { } } } - Clause::WhileLoop { var, block } => { + Clause::While { var, block } => { let cell = scope.get_cell(&var)?; // open loop on variable @@ -404,11 +406,48 @@ impl MastermindContext { scope.push_instruction(Instruction::Free(source_cell.memory_id)); } } - Clause::IfElse { - condition, - if_block, - else_block, - } => { + clause @ (Clause::If { + condition: _, + if_block: _, + } + | Clause::IfNot { + condition: _, + if_not_block: _, + } + | Clause::IfElse { + condition: _, + if_block: _, + else_block: _, + } + | Clause::IfNotElse { + condition: _, + if_not_block: _, + else_block: _, + }) => { + // If-else clause types changed recently, so here is a patch to keep the original frontend code: + let (condition, if_block, else_block) = match clause { + Clause::If { + condition, + if_block, + } => (condition, Some(if_block), None), + Clause::IfNot { + condition, + if_not_block, + } => (condition, None, Some(if_not_block)), + Clause::IfElse { + condition, + if_block, + else_block, + } => (condition, Some(if_block), Some(else_block)), + Clause::IfNotElse { + condition, + if_not_block, + else_block, + } => (condition, Some(else_block), Some(if_not_block)), + _ => unreachable!(), + }; + // end patch // + if if_block.is_none() && else_block.is_none() { panic!("Expected block in if/else statement"); }; @@ -598,7 +637,8 @@ impl MastermindContext { name: _, arguments: _, block: _, - } => unreachable!(), + } + | Clause::None => unreachable!(), } } @@ -941,7 +981,7 @@ where } /// Get the correct variable type and allocate the right amount of cells for it - fn allocate_variable(&mut self, var: VariableDefinition) -> Result<&ValueType, String> { + fn allocate_variable(&mut self, var: VariableTypeDefinition) -> Result<&ValueType, String> { r_assert!( !self.variable_memory.contains_key(&var.name), "Cannot allocate variable {var} twice in the same scope" @@ -1042,7 +1082,7 @@ where fn register_struct_definition( &mut self, struct_name: &str, - fields: Vec, + fields: Vec, ) -> Result<(), String> { let mut absolute_fields = vec![]; @@ -1069,7 +1109,7 @@ where fn register_function_definition( &mut self, new_function_name: &str, - new_arguments: Vec>, + new_arguments: Vec>, new_block: Vec>, ) -> Result<(), String> { let absolute_arguments = new_arguments diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index c3365e5..11ea637 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -19,6 +19,7 @@ use crate::{ }, brainfuck::{BrainfuckConfig, BrainfuckContext}, misc::MastermindContext, + parser::parser::parse_program, preprocessor::preprocess_from_memory, }; @@ -50,14 +51,13 @@ pub fn wasm_compile( }; let preprocessed_file = preprocess_from_memory(&file_contents, entry_file_name)?; - let tokens = tokenise(&preprocessed_file)?; if ctx.config.enable_2d_grid { - let parsed_syntax = parse::(&tokens)?; + let parsed_syntax = parse_program::(&preprocessed_file)?; let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); let bf_code = ctx.ir_to_bf(instructions, None)?; Ok(bf_code.to_string()) } else { - let parsed_syntax = parse::(&tokens)?; + let parsed_syntax = parse_program::(&preprocessed_file)?; let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); let bf_code = ctx.ir_to_bf(instructions, None)?; Ok(bf_code.to_string()) diff --git a/compiler/src/parser/expressions.rs b/compiler/src/parser/expressions.rs index 39984f3..34c580b 100644 --- a/compiler/src/parser/expressions.rs +++ b/compiler/src/parser/expressions.rs @@ -1,3 +1,7 @@ +use std::num::Wrapping; + +use crate::macros::macros::r_panic; + use super::types::VariableTarget; // TODO: add multiplication @@ -33,7 +37,7 @@ impl Sign { impl Expression { // Iterators? // TODO: support post/pre increment in expressions - pub fn parse(tokens: &[Token]) -> Result { + pub fn parse(chars: &mut &[char]) -> Result { let mut i = 0usize; if let Token::String(s) = &tokens[i] { @@ -176,9 +180,8 @@ impl Expression { }); current_sign = None; } - token => { - r_panic!("Unexpected token {token:#?} found in expression: {tokens:#?}"); - } + // TODO: add source snippet + token => r_panic!("Unexpected token {token:#?} found in expression."), } } @@ -188,7 +191,8 @@ impl Expression { sign: Sign::Positive, summands, }, - _ => r_panic!("Expected value in expression: {tokens:#?}"), + // TODO: add source snippet + _ => r_panic!("Expected value in expression."), }) } diff --git a/compiler/src/parser/mod.rs b/compiler/src/parser/mod.rs index 71f2aaf..db36f12 100644 --- a/compiler/src/parser/mod.rs +++ b/compiler/src/parser/mod.rs @@ -1,5 +1,6 @@ -mod expressions; -mod parser; -mod tests; -mod tokeniser; +pub mod expressions; +pub mod parser; +pub mod tokeniser; pub mod types; + +mod tests; diff --git a/compiler/src/parser/parser.rs b/compiler/src/parser/parser.rs index 41567a0..28262cc 100644 --- a/compiler/src/parser/parser.rs +++ b/compiler/src/parser/parser.rs @@ -18,32 +18,44 @@ use crate::{ parser::types::VariableTypeDefinition, }; -pub fn parse_clause( +pub fn parse_program( + raw: &str, +) -> Result>, String> { + let program_chars: Vec = raw.chars().collect(); + let mut chars_slice = &program_chars[..]; + let mut clauses = vec![]; + loop { + let clause = parse_clause(&mut chars_slice)?; + if let Clause::None = clause { + break; + } + clauses.push(clause); + } + + Ok(clauses) +} + +fn parse_clause( chars: &mut &[char], ) -> Result, String> { - // TODO: refactor this? inconsistent function calling in different parsing functions - let next_token = - |s| next_token(s).map_err(|()| format!("Invalid token: {}", chars.iter().collect())); - - let mut s = chars; - Ok(match next_token(s)? { - // None signifies end of input - None => None, - Token::If => parse_if_else_clause(chars)?, - Token::While => parse_while_clause(chars)?, - Token::Fn => parse_function_definition_clause(chars)?, - Token::Struct => { - let Token::Name(_) = next_token(s)? else { + let mut s = *chars; + Ok(match next_token(&mut s) { + Ok(Token::None) => Clause::None, + Ok(Token::If) => parse_if_else_clause(chars)?, + Ok(Token::While) => parse_while_clause(chars)?, + Ok(Token::Fn) => parse_function_definition_clause(chars)?, + Ok(Token::Struct) => { + let Ok(Token::Name(_)) = next_token(&mut s) else { // TODO: add source snippet r_panic!("Expected identifier after `struct` keyword."); }; - match next_token(s)? { - Token::OpenBrace => parse_struct_definition_clause(chars)?, + match next_token(&mut s) { + Ok(Token::OpenBrace) => parse_struct_definition_clause(chars)?, _ => parse_let_clause(chars)?, } } - Token::Cell => parse_let_clause(chars)?, - _ => todo!(), + Ok(Token::Cell) => parse_let_clause(chars)?, + Err(()) | Ok(_) => r_panic!("Invalid starting token."), }) } @@ -59,18 +71,20 @@ impl TapeCellLocation for TapeCell { fn parse_location_specifier( chars: &mut &[char], ) -> Result, String> { - let mut s = chars; - let Token::At = next_token(s)? else { + let mut s = *chars; + let Ok(Token::At) = next_token(&mut s) else { return Ok(LocationSpecifier::None); }; *chars = s; - match next_token(s)? { - Token::Minus | Token::Digits(_) => Ok(LocationSpecifier::Cell(parse_integer(chars)?)), + match next_token(&mut s) { + Ok(Token::Minus | Token::Digits(_)) => { + Ok(LocationSpecifier::Cell(parse_integer(chars)?)) + } // variable location specifier: - Token::Name(_) => Ok(LocationSpecifier::Variable(parse_var_target(chars)?)), + Ok(Token::Name(_)) => Ok(LocationSpecifier::Variable(parse_var_target(chars)?)), // TODO: add source snippet - _ => r_panic!("Invalid location specifier: ",), + _ => r_panic!("Invalid location specifier.",), } } @@ -84,26 +98,26 @@ impl TapeCellLocation for TapeCell2D { fn parse_location_specifier( chars: &mut &[char], ) -> Result, String> { - let mut s = chars; - let Token::At = next_token(s)? else { + let mut s = *chars; + let Ok(Token::At) = next_token(&mut s) else { return Ok(LocationSpecifier::None); }; *chars = s; - match next_token(s)? { - Token::OpenParenthesis => { + match next_token(&mut s) { + Ok(Token::OpenParenthesis) => { // parse a 2-tuple let tuple = parse_integer_tuple::<2>(chars)?; Ok(LocationSpecifier::Cell(TapeCell2D(tuple[0], tuple[1]))) } - Token::Minus | Token::Digits(_) => Ok(LocationSpecifier::Cell(TapeCell2D( + Ok(Token::Minus | Token::Digits(_)) => Ok(LocationSpecifier::Cell(TapeCell2D( parse_integer(chars)?, 0, ))), // variable location specifier: - Token::Name(_) => Ok(LocationSpecifier::Variable(parse_var_target(chars)?)), + Ok(Token::Name(_)) => Ok(LocationSpecifier::Variable(parse_var_target(chars)?)), // TODO: add source snippet - _ => r_panic!("Invalid location specifier: ",), + _ => r_panic!("Invalid location specifier."), } } @@ -119,10 +133,10 @@ impl TapeCellLocation for TapeCell2D { fn parse_var_type_definition( chars: &mut &[char], ) -> Result, String> { - let mut var_type = match next_token(chars)? { - Token::Cell => VariableTypeReference::Cell, - Token::Struct => { - let Token::Name(struct_name) = next_token(chars)? else { + let mut var_type = match next_token(chars) { + Ok(Token::Cell) => VariableTypeReference::Cell, + Ok(Token::Struct) => { + let Ok(Token::Name(struct_name)) = next_token(chars) else { // TODO: add source snippet r_panic!("Expected struct type name in variable definition."); }; @@ -137,14 +151,14 @@ fn parse_var_type_definition( // parse array specifiers { - let mut s = chars; - while let Token::OpenSquareBracket = next_token(s)? { + let mut s = *chars; + while let Ok(Token::OpenSquareBracket) = next_token(&mut s) { var_type = VariableTypeReference::Array(Box::new(var_type), parse_array_length(chars)?); s = chars; } } - let Token::Name(name) = next_token(chars)? else { + let Ok(Token::Name(name)) = next_token(chars) else { // TODO: add source snippet r_panic!("Expected name in variable definition."); }; @@ -159,22 +173,22 @@ fn parse_var_type_definition( /// parse the subscript of an array variable, e.g. [4] [6] [0] /// must be compile-time constant fn parse_subscript(chars: &mut &[char]) -> Result { - let Token::OpenSquareBracket = next_token(chars)? else { + let Ok(Token::OpenSquareBracket) = next_token(chars) else { // TODO: add program snippet r_panic!("Expected `[` in array subscript."); }; - let Token::Digits(digits) = next_token(chars)? else { + let Ok(Token::Digits(digits)) = next_token(chars) else { // TODO: add program snippet r_panic!("Expected natural number in array subscript."); }; - let Token::ClosingSquareBracket = next_token(chars)? else { + let Ok(Token::ClosingSquareBracket) = next_token(chars) else { // TODO: add program snippet r_panic!("Expected `]` in array subscript."); }; // TODO: fix duplicate error here digits .parse::() - .map_err(|_| Err(format!("Expected natural number in array subscript."))) + .map_err(|_| format!("Expected natural number in array subscript.")) } /// parse_array_subscript but with a length check @@ -187,8 +201,8 @@ fn parse_array_length(chars: &mut &[char]) -> Result { fn parse_var_target(chars: &mut &[char]) -> Result { let is_spread = { - let s = chars; - if let Token::Asterisk = next_token(s)? { + let mut s = *chars; + if let Ok(Token::Asterisk) = next_token(&mut s) { *chars = s; true } else { @@ -196,26 +210,28 @@ fn parse_var_target(chars: &mut &[char]) -> Result { } }; - let Token::Name(base_var_name) = next_token(chars)? else { + let Ok(Token::Name(base_var_name)) = next_token(chars) else { // TODO: add source snippet r_panic!("Expected identifier in variable target identifier."); }; let mut ref_chain = vec![]; - let mut s = chars; + let mut s = *chars; loop { - match next_token(s)? { - Token::OpenSquareBracket => { + match next_token(&mut s) { + Ok(Token::OpenSquareBracket) => { let index = parse_subscript(chars)?; ref_chain.push(Reference::Index(index)); } - Token::Dot => { - let Token::Name(subfield_name) = next_token(s)? else { + Ok(Token::Dot) => { + let Ok(Token::Name(subfield_name)) = next_token(&mut s) else { // TODO: add source snippet r_panic!("Expected subfield name in variable target identifier."); }; ref_chain.push(Reference::NamedField(subfield_name)); } + // TODO: add source snippet + Err(_) => r_panic!("Unexpected token found in variable target."), _ => { break; } @@ -239,7 +255,7 @@ fn parse_integer(chars: &mut &[char]) -> Result { let mut is_negative = false; if let Ok(Token::Minus) = token { is_negative = true; - token = next_token(chars)?; + token = next_token(chars); } let Ok(Token::Digits(digits)) = token else { // TODO: add source snippet @@ -253,7 +269,7 @@ fn parse_integer(chars: &mut &[char]) -> Result { true => -(magnitude as i32), }) // TODO: fix duplicate error here - .map_err(|_| Err(format!("Expected integer."))) + .map_err(|_| format!("Expected integer.")) } fn parse_integer_tuple(chars: &mut &[char]) -> Result<[i32; LENGTH], String> { @@ -295,7 +311,7 @@ fn parse_if_else_clause( let is_not = { let s = chars; - if let Token::Not = next_token(s)? { + if let Ok(Token::Not) = next_token(s) { *chars = s; true } else { @@ -314,7 +330,7 @@ fn parse_if_else_clause( let block_two = { let mut s = chars; - if let Token::Else = next_token(s)? { + if let Ok(Token::Else) = next_token(s) { *chars = s; Some(parse_block(chars)?) } else { @@ -347,9 +363,9 @@ fn parse_if_else_clause( fn parse_while_clause( chars: &mut &[char], ) -> Result, String> { - let Token::While = next_token(chars)? else { - // TODO: add program snippet - r_panic!("Expected \"while\" in while clause."); + let Ok(Token::While) = next_token(chars) else { + // TODO: add source snippet + r_panic!("Expected `while` in while clause."); }; let Ok(condition_char_len) = find_next(chars, '{') else { @@ -375,37 +391,36 @@ fn parse_while_clause( fn parse_function_definition_clause( chars: &mut &[char], ) -> Result, String> { - let Token::Fn = next_token(chars)? else { + let Ok(Token::Fn) = next_token(chars) else { // TODO: add source snippet r_panic!("Expected `fn` in function definition clause."); }; - let Token::Name(function_name) = next_token(chars)? else { + let Ok(Token::Name(function_name)) = next_token(chars) else { // TODO: add source snippet r_panic!("Expected name in function definition clause."); }; - let Token::OpenParenthesis = next_token(chars)? else { + let Ok(Token::OpenParenthesis) = next_token(chars) else { // TODO: add source snippet r_panic!("Expected argument list in function definition clause."); }; let mut arguments = vec![]; loop { { - let mut s = chars; - if let Token::ClosingParenthesis = next_token(s)? { + let mut s = *chars; + if let Ok(Token::ClosingParenthesis) = next_token(&mut s) { *chars = s; break; } } arguments.push(parse_var_type_definition(chars)?); - let token = next_token(chars)?; - match token { - Token::ClosingParenthesis => break, - Token::Comma => (), - Some(token) => r_panic!("Unexpected token in function argument list: `{token}`."), - None => r_panic!("Expected token in function argument list."), + match next_token(chars) { + Ok(Token::ClosingParenthesis) => break, + Ok(Token::Comma) => (), + // TODO: add source snippet + _ => r_panic!("Unexpected token in function argument list."), } } @@ -437,7 +452,7 @@ fn parse_struct_definition_clause( let mut fields = vec![]; loop { - let field = parse_var_type_definition(chars)?; + let field = parse_var_type_definition::(chars)?; fields.push(field.try_into()?); let Ok(Token::Semicolon) = next_token(chars) else { // TODO: add source snippet diff --git a/compiler/src/parser/tests.rs b/compiler/src/parser/tests.rs index 447e5c6..ad3c603 100644 --- a/compiler/src/parser/tests.rs +++ b/compiler/src/parser/tests.rs @@ -1,370 +1,223 @@ -#[cfg(test)] -mod tokeniser_tests { - use super::super::{parser::next_token, types::Token}; - - fn tokenise(input_str: &str) -> Result, String> { - let mut tokens = vec![]; - let chars = input_str.chars().collect::>(); - let mut c = &chars; - while let Some(token) = next_token(&c)? { - tokens.push(token); - } - Ok(tokens) - } - - fn _tokenisation_test(input_str: &str, desired_output: &[Token]) { - let input_string = String::from(input_str); - let actual_output = tokenise(&input_string).unwrap(); - println!("desired: {desired_output:#?}"); - println!("actual: {actual_output:#?}"); - assert!(actual_output.iter().eq(desired_output)); - } - - #[test] - fn character_literals_1() { - _tokenisation_test( - r#"'a' 'b' 'c' ' '"#, - &[ - Token::Character('a'), - Token::Character('b'), - Token::Character('c'), - Token::Character(' '), - ], - ); - } - - #[test] - fn character_literals_2() { - _tokenisation_test(r#"'\n'"#, &[Token::Character('\n')]); - } - - #[test] - fn character_literals_3() { - _tokenisation_test(r#"'"'"#, &[Token::Character('"')]); - } - - #[test] - fn character_literals_4() { - _tokenisation_test(r#"'\''"#, &[Token::Character('\'')]); - } - - #[test] - #[should_panic] - fn character_literals_5() { - _tokenisation_test(r#"'\'"#, &[Token::Character('\\')]); - } - - #[test] - #[should_panic] - fn character_literals_6() { - _tokenisation_test(r#"'aa'"#, &[Token::String(String::from("aa"))]); - } - - #[test] - fn string_literals_1() { - _tokenisation_test("\"hello\"", &[Token::String(String::from("hello"))]); - } - - #[test] - fn string_literals_2() { - _tokenisation_test(r#""""#, &[Token::String(String::from(""))]); - } - - #[test] - fn string_literals_2a() { - _tokenisation_test( - r#""""""#, - &[ - Token::String(String::from("")), - Token::String(String::from("")), - ], - ); - } - - #[test] - fn string_literals_3() { - _tokenisation_test( - r#""\"" " ""#, - &[ - Token::String(String::from("\"")), - Token::String(String::from(" ")), - ], - ); - } -} - #[cfg(test)] mod parser_tests { use super::super::{ expressions::Expression, + parser::parse_program, + tokeniser::Token, types::{ - Clause, ExtendedOpcode, LocationSpecifier, Token, VariableTarget, - VariableTypeDefinition, VariableTypeReference, + Clause, ExtendedOpcode, LocationSpecifier, VariableTarget, VariableTypeDefinition, + VariableTypeReference, }, }; - use crate::backend::{ - bf::TapeCell, - bf2d::{Opcode2D, TapeCell2D}, + use crate::{ + backend::{ + bf::{Opcode, TapeCell}, + bf2d::{Opcode2D, TapeCell2D}, + }, + macros::macros::r_assert, }; #[test] fn parse_if_1() { - assert!(parse::(&[ - // if true {{}} - Token::If, - Token::True, - Token::OpenBrace, - Token::OpenBrace, - Token::ClosingBrace, - Token::ClosingBrace, - ]) - .unwrap() - .iter() - .eq(&[Clause::IfElse { - condition: Expression::NaturalNumber(1), - if_block: Some(vec![Clause::::Block(vec![])]), - else_block: None, - }])); + assert!(parse_program::("if true {{}}") + .unwrap() + .iter() + .eq(&[Clause::If { + condition: Expression::NaturalNumber(1), + if_block: vec![Clause::::Block(vec![])], + }])); } #[test] fn end_tokens_1() { - let _ = parse::(&[Token::Clobbers]).expect_err(""); + assert_eq!( + parse_program::("clobbers").unwrap_err(), + "" + ); } #[test] fn end_tokens_2() { - let _ = parse::(&[Token::Semicolon]).unwrap(); - let _ = parse::(&[Token::Semicolon, Token::Semicolon]).unwrap(); - let _ = - parse::(&[Token::Semicolon, Token::Semicolon, Token::Semicolon]) - .unwrap(); + assert_eq!(parse_program::(";").unwrap_err(), ""); + assert_eq!(parse_program::(";;").unwrap_err(), ""); + assert_eq!(parse_program::(";;;").unwrap_err(), ""); } #[test] fn end_tokens_3() { - let _ = parse::(&[Token::Cell, Token::Semicolon]).expect_err(""); + assert_eq!(parse_program::("cell;").unwrap_err(), "") } #[test] fn while_condition_1() { - assert!(parse::(&[ - Token::While, - Token::Name(String::from("x")), - Token::OpenBrace, - Token::OpenBrace, - Token::ClosingBrace, - Token::ClosingBrace, - ]) - .unwrap() - .iter() - .eq(&[Clause::WhileLoop { - var: VariableTarget { - name: String::from("x"), - subfields: None, - is_spread: false - }, - block: vec![Clause::Block(vec![])] - }])) + assert!(parse_program::("while x {{}}") + .unwrap() + .iter() + .eq(&[Clause::While { + var: VariableTarget { + name: String::from("x"), + subfields: None, + is_spread: false + }, + block: vec![Clause::Block(vec![])] + }])) } #[test] fn two_dimensional_1() { - assert!(parse::(&[ - Token::Cell, - Token::Name(String::from("x")), - Token::At, - Token::OpenParenthesis, - Token::Digits(String::from("0")), - Token::Comma, - Token::Digits(String::from("1")), - Token::ClosingParenthesis, - Token::Semicolon, - ]) - .unwrap_err() - .contains("Invalid location specifier")); + assert_eq!( + parse_program::("cell x @(0, 1);").unwrap_err(), + "Invalid location specifier @(0, 1)" + ); } #[test] fn two_dimensional_2() { - assert!(parse::(&[ - Token::Cell, - Token::Name(String::from("x")), - Token::At, - Token::OpenParenthesis, - Token::Digits(String::from("0")), - Token::Comma, - Token::Digits(String::from("1")), - Token::ClosingParenthesis, - Token::Semicolon, - ]) - .unwrap() - .iter() - .eq(&[Clause::DeclareVariable { - var: VariableTypeDefinition { - name: String::from("x"), - var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::Cell(TapeCell2D(0, 1)) - } - }])); + assert!(parse_program::("cell x @(0, 1);") + .unwrap() + .iter() + .eq(&[Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("x"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::Cell(TapeCell2D(0, 1)) + } + }])); } #[test] fn two_dimensional_3() { - assert!(parse::(&[ - Token::Cell, - Token::Name(String::from("xyz")), - Token::At, - Token::OpenParenthesis, - Token::Minus, - Token::Digits(String::from("10")), - Token::Comma, - Token::Minus, - Token::Digits(String::from("101")), - Token::ClosingParenthesis, - Token::Semicolon, - ]) - .unwrap() - .iter() - .eq(&[Clause::DeclareVariable { - var: VariableTypeDefinition { - name: String::from("xyz"), - var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::Cell(TapeCell2D(-10, -101)) - } - }])); + assert!( + parse_program::("cell xyz @(-10, -101);") + .unwrap() + .iter() + .eq(&[Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("xyz"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::Cell(TapeCell2D(-10, -101)) + } + }]) + ); } #[test] - fn var_v() { - assert!(parse::(&[ - Token::Cell, - Token::Name(String::from("v")), - Token::Semicolon - ]) - .unwrap() - .iter() - .eq(&[Clause::DeclareVariable { - var: VariableTypeDefinition { - name: String::from("v"), - var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::None - } - }])) + fn var_v_1d() { + assert!(parse_program::("cell v;") + .unwrap() + .iter() + .eq(&[Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None + } + }])) } #[test] - fn inline_bf_1() { - assert!(parse::(&[ - Token::Cell, - Token::Name(String::from("v")), - Token::Semicolon, - Token::Bf, - Token::OpenBrace, - Token::Plus, - Token::OpenBrace, - Token::Cell, - Token::Name(String::from("v")), - Token::Semicolon, - Token::ClosingBrace, - Token::Minus, - Token::ClosingBrace - ]) - .unwrap() - .iter() - .eq(&[ - Clause::DeclareVariable { + fn var_v_2d() { + assert!(parse_program::("cell v;") + .unwrap() + .iter() + .eq(&[Clause::DeclareVariable { var: VariableTypeDefinition { name: String::from("v"), var_type: VariableTypeReference::Cell, location_specifier: LocationSpecifier::None } - }, - Clause::InlineBrainfuck { - location_specifier: LocationSpecifier::None, - clobbered_variables: vec![], - operations: vec![ - ExtendedOpcode::Opcode(Opcode2D::Add), - ExtendedOpcode::Block(vec![Clause::DeclareVariable { + }])) + } + + #[test] + fn inline_bf_1() { + assert!( + parse_program::("cell v; bf {+{cell v;}-}") + .unwrap() + .iter() + .eq(&[ + Clause::DeclareVariable { var: VariableTypeDefinition { name: String::from("v"), var_type: VariableTypeReference::Cell, location_specifier: LocationSpecifier::None } - }]), - ExtendedOpcode::Opcode(Opcode2D::Subtract), - ] - } - ])) + }, + Clause::InlineBrainfuck { + location_specifier: LocationSpecifier::None, + clobbered_variables: vec![], + operations: vec![ + ExtendedOpcode::Opcode(Opcode::Add), + ExtendedOpcode::Block(vec![Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None + } + }]), + ExtendedOpcode::Opcode(Opcode::Subtract), + ] + } + ]) + ) } - // TODO: make context-based parser for brainfuck and refactor these tests #[test] fn inline_bf_2() { - assert!(parse::(&[ - Token::Cell, - Token::Name(String::from("v")), - Token::Semicolon, - Token::Bf, - Token::OpenBrace, - Token::Name(String::from("v")), - Token::OpenBrace, - Token::Cell, - Token::Name(String::from("v")), - Token::Semicolon, - Token::ClosingBrace, - Token::Caret, - Token::ClosingBrace - ]) - .unwrap() - .iter() - .eq(&[ - Clause::DeclareVariable { - var: VariableTypeDefinition { - name: String::from("v"), - var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::None - } - }, - Clause::InlineBrainfuck { - location_specifier: LocationSpecifier::None, - clobbered_variables: vec![], - operations: vec![ - ExtendedOpcode::Opcode(Opcode2D::Down), - ExtendedOpcode::Block(vec![Clause::DeclareVariable { + assert!( + parse_program::("cell v; bf {v{cell v;}^}") + .unwrap() + .iter() + .eq(&[ + Clause::DeclareVariable { var: VariableTypeDefinition { name: String::from("v"), var_type: VariableTypeReference::Cell, location_specifier: LocationSpecifier::None } - }]), - ExtendedOpcode::Opcode(Opcode2D::Up), - ] - } - ])) + }, + Clause::InlineBrainfuck { + location_specifier: LocationSpecifier::None, + clobbered_variables: vec![], + operations: vec![ + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Block(vec![Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None + } + }]), + ExtendedOpcode::Opcode(Opcode2D::Up), + ] + } + ]) + ) } #[test] fn inline_bf_3() { - assert!(parse::(&[ - Token::Bf, - Token::OpenBrace, - Token::Name(String::from("vvvv")), - Token::MoreThan, - Token::ClosingBrace - ]) - .unwrap() - .iter() - .eq(&[Clause::InlineBrainfuck { - location_specifier: LocationSpecifier::None, - clobbered_variables: vec![], - operations: vec![ - ExtendedOpcode::Opcode(Opcode2D::Down), - ExtendedOpcode::Opcode(Opcode2D::Down), - ExtendedOpcode::Opcode(Opcode2D::Down), - ExtendedOpcode::Opcode(Opcode2D::Down), - ExtendedOpcode::Opcode(Opcode2D::Right), - ] - }])) + assert!(parse_program::("bf {vvvv>}") + .unwrap() + .iter() + .eq(&[Clause::InlineBrainfuck { + location_specifier: LocationSpecifier::None, + clobbered_variables: vec![], + operations: vec![ + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Opcode(Opcode2D::Right), + ] + }])) + } + + #[test] + fn inline_bf_4() { + assert_eq!( + parse_program::("bf {vvvv>}").unwrap_err(), + "" + ); } } diff --git a/compiler/src/parser/tokeniser.rs b/compiler/src/parser/tokeniser.rs index 08d8ae1..dea39eb 100644 --- a/compiler/src/parser/tokeniser.rs +++ b/compiler/src/parser/tokeniser.rs @@ -91,7 +91,7 @@ pub fn find_next(chars: &[char], character: char) -> Result { return Err(()); }; - if c == character { + if *c == character { break; } i += 1; @@ -101,9 +101,9 @@ pub fn find_next(chars: &[char], character: char) -> Result { pub fn find_and_advance<'a>(chars: &'a mut &[char], character: char) -> Result<&'a [char], ()> { let substr_len = find_next(chars, character)?; - let chars_before = chars[..substr_len]; - chars = chars[substr_len..]; - chars_before + let chars_before = &chars[..substr_len]; + *chars = &chars[substr_len..]; + Ok(chars_before) } pub fn skip_whitespace(chars: &mut &[char]) -> Result<(), ()> { @@ -134,3 +134,105 @@ pub fn find_next_whitespace(chars: &[char]) -> Result { } Ok(i) } + +#[cfg(test)] +mod tokeniser_tests { + use crate::macros::macros::r_panic; + + use super::*; + + fn tokenise(input_str: &str) -> Result, String> { + let chars_vec: Vec = input_str.chars().collect(); + let mut chars_slice = &chars_vec[..]; + let mut tokens = vec![]; + loop { + let Ok(token) = next_token(&mut chars_slice) else { + r_panic!("Invlid token in input."); + }; + if let Token::None = token { + break; + } + tokens.push(token); + } + Ok(tokens) + } + + fn _tokenisation_test(input_str: &str, desired_output: &[Token]) { + let actual_output = tokenise(input_str).unwrap(); + println!("desired: {desired_output:#?}"); + println!("actual: {actual_output:#?}"); + assert!(actual_output.iter().eq(desired_output)); + } + + #[test] + fn character_literals_1() { + _tokenisation_test( + r#"'a' 'b' 'c' ' '"#, + &[ + Token::Character('a'), + Token::Character('b'), + Token::Character('c'), + Token::Character(' '), + ], + ); + } + + #[test] + fn character_literals_2() { + _tokenisation_test(r#"'\n'"#, &[Token::Character('\n')]); + } + + #[test] + fn character_literals_3() { + _tokenisation_test(r#"'"'"#, &[Token::Character('"')]); + } + + #[test] + fn character_literals_4() { + _tokenisation_test(r#"'\''"#, &[Token::Character('\'')]); + } + + #[test] + #[should_panic] + fn character_literals_5() { + _tokenisation_test(r#"'\'"#, &[Token::Character('\\')]); + } + + #[test] + #[should_panic] + fn character_literals_6() { + _tokenisation_test(r#"'aa'"#, &[Token::String(String::from("aa"))]); + } + + #[test] + fn string_literals_1() { + _tokenisation_test("\"hello\"", &[Token::String(String::from("hello"))]); + } + + #[test] + fn string_literals_2() { + _tokenisation_test(r#""""#, &[Token::String(String::from(""))]); + } + + #[test] + fn string_literals_2a() { + _tokenisation_test( + r#""""""#, + &[ + Token::String(String::from("")), + Token::String(String::from("")), + ], + ); + } + + #[test] + fn string_literals_3() { + _tokenisation_test( + r#""\"" " ""#, + &[ + Token::String(String::from("\"")), + Token::String(String::from(" ")), + ], + ); + } +} diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index 61839c6..170ac7a 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -14,8 +14,7 @@ pub mod black_box_tests { }, brainfuck::{bvm_tests::run_code, BrainfuckConfig}, misc::{MastermindConfig, MastermindContext}, - parser::parse, - tokeniser::{tokenise, Token}, + parser::parser::parse_program, }; // TODO: run test suite with different optimisations turned on const OPT_NONE: MastermindConfig = MastermindConfig { @@ -105,8 +104,7 @@ pub mod black_box_tests { Vec: BrainfuckProgram, { let ctx = MastermindContext { config: OPT_NONE }; - let tokens: Vec = tokenise(program)?; - let clauses = parse::(&tokens)?; + let clauses = parse_program::(program)?; let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); let bf_program = ctx.ir_to_bf(instructions, None)?; let bfs = bf_program.to_string(); @@ -127,8 +125,7 @@ pub mod black_box_tests { let ctx = MastermindContext { config: config.unwrap_or(OPT_NONE), }; - let tokens: Vec = tokenise(program)?; - let clauses = parse::(&tokens)?; + let clauses = parse_program::(program)?; let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); let bf_code = ctx.ir_to_bf(instructions, None)?; @@ -142,21 +139,49 @@ pub mod black_box_tests { #[test] fn empty_program_2() { - assert_eq!(compile_and_run::(";", "").unwrap(), ""); + assert_eq!(compile_and_run::("{}", "").unwrap(), ""); + } + + #[test] + fn empty_program_2a() { + assert_eq!( + compile_and_run::("{{{{}}}}", "").unwrap(), + "" + ); + } + + #[test] + fn empty_program_2b() { + assert_eq!( + compile_and_run::( + "{{}} {} {{{}{}}} {{{ { }{ }} {{ }{ }}} {{{ }{}}{{} {}}}}", + "" + ) + .unwrap(), + "" + ); } #[test] fn empty_program_3() { assert_eq!( - compile_and_run::(";;;;;;", "").unwrap(), + compile_and_run::(";", "").unwrap_err(), + "" + ); + } + + #[test] + fn empty_program_3a() { + assert_eq!( + compile_and_run::(";;;;;;", "").unwrap_err(), "" ); } #[test] - fn empty_program_4() { + fn empty_program_3b() { assert_eq!( - compile_and_run::(";;{;{;};};;;", "").unwrap(), + compile_and_run::(";;{;{;};};;;", "").unwrap_err(), "" ); } From 60b957e5ecba15233554ea35512dd8d79d50fc5e Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Fri, 7 Nov 2025 20:24:17 +1100 Subject: [PATCH 26/56] Add tests for expression parsing --- compiler/src/parser/tests.rs | 217 +++++++++++++++++++++++++++++++++-- 1 file changed, 210 insertions(+), 7 deletions(-) diff --git a/compiler/src/parser/tests.rs b/compiler/src/parser/tests.rs index ad3c603..55708b3 100644 --- a/compiler/src/parser/tests.rs +++ b/compiler/src/parser/tests.rs @@ -3,18 +3,14 @@ mod parser_tests { use super::super::{ expressions::Expression, parser::parse_program, - tokeniser::Token, types::{ Clause, ExtendedOpcode, LocationSpecifier, VariableTarget, VariableTypeDefinition, VariableTypeReference, }, }; - use crate::{ - backend::{ - bf::{Opcode, TapeCell}, - bf2d::{Opcode2D, TapeCell2D}, - }, - macros::macros::r_assert, + use crate::backend::{ + bf::{Opcode, TapeCell}, + bf2d::{Opcode2D, TapeCell2D}, }; #[test] @@ -220,4 +216,211 @@ mod parser_tests { "" ); } + + #[test] + fn strings_1() { + assert!(parse_program::( + r#" +cell[5] ggghh = "hello"; +"# + ) + .unwrap() + .iter() + .eq(&[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("ggghh"), + var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 5), + location_specifier: LocationSpecifier::None + }, + value: Expression::StringLiteral(String::from("hello")) + }])); + } + + #[test] + fn strings_1a() { + assert_eq!( + parse_program::( + r#" +cell[0] ggghh = ""; +"# + ) + .unwrap_err(), + "" + ); + } + + #[test] + fn strings_1b() { + assert!(parse_program::( + r#" +cell[1] ggghh = "hello"; +"# + ) + .unwrap() + .iter() + .eq(&[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("ggghh"), + var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 1), + location_specifier: LocationSpecifier::None + }, + value: Expression::StringLiteral(String::from("hello")) + }])); + } + + #[test] + fn strings_2() { + assert!(parse_program::( + r#" +cell[6] ggghh = "hel'lo"; +"# + ) + .unwrap() + .iter() + .eq(&[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("ggghh"), + var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 6), + location_specifier: LocationSpecifier::None + }, + value: Expression::StringLiteral(String::from("hel'lo")) + }])); + } + + #[test] + fn strings_3() { + assert!(parse_program::( + r#" +cell[7] ggghh = "\"hello\""; +"# + ) + .unwrap() + .iter() + .eq(&[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("ggghh"), + var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 7), + location_specifier: LocationSpecifier::None + }, + value: Expression::StringLiteral(String::from("\"hello\"")) + }])); + } + + #[test] + fn arrays_1() { + assert!(parse_program::( + r#" +cell[0] ggghh = []; +"# + ) + .unwrap() + .iter() + .eq(&[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("ggghh"), + var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 0), + location_specifier: LocationSpecifier::None + }, + value: Expression::ArrayLiteral(vec![]) + }])); + } + + #[test] + fn arrays_2() { + assert!(parse_program::( + r#" +cell[333] arr = [45, 53]; +"# + ) + .unwrap() + .iter() + .eq(&[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 3), + location_specifier: LocationSpecifier::None + }, + value: Expression::ArrayLiteral(vec![ + Expression::NaturalNumber(45), + Expression::NaturalNumber(53) + ]) + }])); + } + + #[test] + fn arrays_3() { + assert!(parse_program::( + r#" +cell[3] arr = ['h', 53, (((4)))]; +"# + ) + .unwrap() + .iter() + .eq(&[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 3), + location_specifier: LocationSpecifier::None + }, + value: Expression::ArrayLiteral(vec![ + Expression::NaturalNumber(104), + Expression::NaturalNumber(53), + Expression::NaturalNumber(4) + ]) + }])); + } + + #[test] + fn arrays_4() { + assert!(parse_program::( + r#" +struct nonsense[39] arr @-56 = ["hello!", 53, [4,5,6]]; +"# + ) + .unwrap() + .iter() + .eq(&[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 39), + location_specifier: LocationSpecifier::Cell(-56) + }, + value: Expression::ArrayLiteral(vec![ + Expression::StringLiteral(String::from("hello!")), + Expression::NaturalNumber(53), + Expression::ArrayLiteral(vec![ + Expression::NaturalNumber(4), + Expression::NaturalNumber(5), + Expression::NaturalNumber(6) + ]) + ]) + }])); + } + + #[test] + fn arrays_5() { + assert!(parse_program::( + r#" +struct nonsense[39] arr @-56 = ["hello!", ',', [4,"hello comma: ,",6]]; +"# + ) + .unwrap() + .iter() + .eq(&[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 39), + location_specifier: LocationSpecifier::Cell(-56) + }, + value: Expression::ArrayLiteral(vec![ + Expression::StringLiteral(String::from("hello!")), + Expression::NaturalNumber(44), + Expression::ArrayLiteral(vec![ + Expression::NaturalNumber(4), + Expression::StringLiteral(String::from("hello comma: ,")), + Expression::NaturalNumber(6) + ]) + ]) + }])); + } } From 0bbd3f8d6f52f7f5dce3f620996d296ec0d5a122 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Sun, 9 Nov 2025 10:07:16 +1100 Subject: [PATCH 27/56] Fix errors in new parser --- compiler/src/backend/bf.rs | 4 +- compiler/src/backend/bf2d.rs | 6 +- compiler/src/main.rs | 18 +-- compiler/src/parser/expressions.rs | 172 ++++++++++++++++------------- compiler/src/parser/parser.rs | 83 ++++++-------- compiler/src/parser/tests.rs | 116 ++++++++++++++++++- compiler/src/parser/tokeniser.rs | 2 + 7 files changed, 256 insertions(+), 145 deletions(-) diff --git a/compiler/src/backend/bf.rs b/compiler/src/backend/bf.rs index 99d0fa7..c5fc29d 100644 --- a/compiler/src/backend/bf.rs +++ b/compiler/src/backend/bf.rs @@ -36,8 +36,8 @@ impl OpcodeVariant for Opcode { Ok(match token { Token::Plus => Opcode::Add, Token::Minus => Opcode::Subtract, - Token::MoreThan => Opcode::Right, - Token::LessThan => Opcode::Left, + // Token::MoreThan => Opcode::Right, + // Token::LessThan => Opcode::Left, Token::OpenSquareBracket => Opcode::OpenLoop, Token::ClosingSquareBracket => Opcode::CloseLoop, Token::Dot => Opcode::Output, diff --git a/compiler/src/backend/bf2d.rs b/compiler/src/backend/bf2d.rs index a5a12a8..e8f34c4 100644 --- a/compiler/src/backend/bf2d.rs +++ b/compiler/src/backend/bf2d.rs @@ -41,13 +41,13 @@ impl OpcodeVariant for Opcode2D { Ok(match token { Token::Plus => Opcode2D::Add, Token::Minus => Opcode2D::Subtract, - Token::MoreThan => Opcode2D::Right, - Token::LessThan => Opcode2D::Left, + // Token::MoreThan => Opcode2D::Right, + // Token::LessThan => Opcode2D::Left, Token::OpenSquareBracket => Opcode2D::OpenLoop, Token::ClosingSquareBracket => Opcode2D::CloseLoop, Token::Dot => Opcode2D::Output, Token::Comma => Opcode2D::Input, - Token::Caret => Opcode2D::Up, + // Token::Caret => Opcode2D::Up, // TODO: implement this: // Token::Down => Opcode2D::Down, _ => r_panic!("Invalid token in inline Brainfuck: {token:?}"), diff --git a/compiler/src/main.rs b/compiler/src/main.rs index eea816f..a7ddcdb 100644 --- a/compiler/src/main.rs +++ b/compiler/src/main.rs @@ -11,7 +11,6 @@ mod misc; mod parser; mod preprocessor; mod tests; -mod tokeniser; use crate::{ backend::{ bf::{Opcode, TapeCell}, @@ -20,9 +19,8 @@ use crate::{ }, brainfuck::{BrainfuckConfig, BrainfuckContext}, misc::{MastermindConfig, MastermindContext}, - parser::parse, + parser::parser::parse_program, preprocessor::preprocess, - tokeniser::tokenise, }; // stdlib dependencies: @@ -82,30 +80,26 @@ fn main() -> Result<(), String> { config: MastermindConfig::new(args.optimise), }; - let program; - match args.file { + let program = match args.file { Some(file) => { let file_path = std::path::PathBuf::from(file); // c-style preprocessor (includes and maybe some simple conditionals to avoid double includes) - program = preprocess(file_path); - } - None => { - program = args.program.unwrap(); + preprocess(file_path) } + None => args.program.unwrap(), }; let bf_program = match args.compile { true => { // compile the provided file - let tokens = tokenise(&program)?; if ctx.config.enable_2d_grid { - let parsed_syntax = parse::(&tokens)?; + let parsed_syntax = parse_program::(&program)?; let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); let bf_code = ctx.ir_to_bf(instructions, None)?; bf_code.to_string() } else { - let parsed_syntax = parse::(&tokens)?; + let parsed_syntax = parse_program::(&program)?; let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); let bf_code = ctx.ir_to_bf(instructions, None)?; bf_code.to_string() diff --git a/compiler/src/parser/expressions.rs b/compiler/src/parser/expressions.rs index 34c580b..42671ab 100644 --- a/compiler/src/parser/expressions.rs +++ b/compiler/src/parser/expressions.rs @@ -1,8 +1,11 @@ -use std::num::Wrapping; - -use crate::macros::macros::r_panic; +use super::{ + parser::parse_var_target, + tokeniser::{next_token, Token}, + types::VariableTarget, +}; +use crate::macros::macros::{r_assert, r_panic}; -use super::types::VariableTarget; +use std::num::Wrapping; // TODO: add multiplication // yes, but no variable * variable multiplication or division @@ -38,71 +41,92 @@ impl Expression { // Iterators? // TODO: support post/pre increment in expressions pub fn parse(chars: &mut &[char]) -> Result { - let mut i = 0usize; - - if let Token::String(s) = &tokens[i] { - i += 1; - r_assert!( - i == tokens.len(), - "Expected semicolon after string literal {tokens:#?}" - ); - return Ok(Expression::StringLiteral(s.clone())); + { + let mut s = *chars; + if let Ok(Token::String(literal)) = next_token(&mut s) { + let Ok(Token::None) = next_token(&mut s) else { + // TODO: add source snippet + r_panic!("String literal must entirely comprise expression."); + }; + return Ok(Expression::StringLiteral(literal)); + } } - if let Token::OpenSquareBracket = &tokens[i] { - let braced_tokens = get_braced_tokens(&tokens[i..], SQUARE_BRACKETS)?; - i += 2 + braced_tokens.len(); - r_assert!( - i == tokens.len(), - "Expected semicolon after array literal {tokens:#?}" - ); - // parse the array - let results: Result, String> = braced_tokens - .split(|t| if let Token::Comma = t { true } else { false }) - .map(Self::parse) - .collect(); - // TODO: why do I need to split collect result into a seperate variable like here? - return Ok(Expression::ArrayLiteral(results?)); + { + let mut s = *chars; + if let Ok(Token::OpenSquareBracket) = next_token(&mut s) { + *chars = s; + let mut expressions = vec![]; + loop { + expressions.push(Self::parse(chars)?); + match next_token(chars) { + Ok(Token::ClosingSquareBracket) => break, + Ok(Token::Comma) => { + *chars = s; + } + _ => r_panic!("Unexpected token in array literal."), + } + } + s = *chars; + // check for delimiters + let Ok( + Token::Semicolon + | Token::Comma + | Token::ClosingParenthesis + | Token::ClosingSquareBracket + | Token::None, + ) = next_token(&mut s) + else { + // TODO: add source snippet + r_panic!("Array literal must entirely comprise expression."); + }; + return Ok(Expression::ArrayLiteral(expressions)); + } } + // this loop is basically a state machine based on the current sign: let mut current_sign = Some(Sign::Positive); // by default the first summand is positive let mut summands = Vec::new(); - while i < tokens.len() { - match (¤t_sign, &tokens[i]) { - (None, Token::Plus) => { + loop { + let mut s = *chars; + match (¤t_sign, next_token(&mut s)) { + (None, Ok(Token::Plus)) => { + *chars = s; current_sign = Some(Sign::Positive); - i += 1; } - (None, Token::Minus) => { + (None, Ok(Token::Minus)) => { + *chars = s; current_sign = Some(Sign::Negative); - i += 1; } - (Some(Sign::Positive), Token::Minus) => { + (Some(Sign::Positive), Ok(Token::Minus)) => { + *chars = s; current_sign = Some(Sign::Negative); - i += 1; } - (Some(Sign::Negative), Token::Minus) => { + (Some(Sign::Negative), Ok(Token::Minus)) => { + *chars = s; current_sign = Some(Sign::Positive); - i += 1; } - (Some(sign), Token::Digits(literal)) => { - let parsed_int: usize = literal.parse().unwrap(); - i += 1; - match sign { - Sign::Positive => summands.push(Expression::NaturalNumber(parsed_int)), - Sign::Negative => summands.push(Expression::SumExpression { - sign: Sign::Negative, - summands: vec![Expression::NaturalNumber(parsed_int)], - }), - } - current_sign = None; - } - (Some(sign), token @ (Token::True | Token::False)) => { + ( + Some(sign), + Ok( + token @ (Token::Digits(_) + | Token::Character(_) + | Token::True + | Token::False), + ), + ) => { + *chars = s; let parsed_int = match token { + Token::Digits(digits) => digits.parse::().unwrap(), + Token::Character(c) => { + let chr_int = c as usize; + r_assert!(chr_int < 0xff, "Character tokens must be single-byte: {c}"); + chr_int + } Token::True => 1, - Token::False | _ => 0, + Token::False => 0, + _ => unreachable!(), }; - i += 1; summands.push(match sign { Sign::Positive => Expression::NaturalNumber(parsed_int), Sign::Negative => Expression::SumExpression { @@ -112,27 +136,8 @@ impl Expression { }); current_sign = None; } - (Some(sign), Token::Character(chr)) => { - let chr_int: usize = *chr as usize; - - r_assert!( - chr_int < 0xff, - "Character tokens must be single-byte: {chr}" - ); - - i += 1; - summands.push(match sign { - Sign::Positive => Expression::NaturalNumber(chr_int), - Sign::Negative => Expression::SumExpression { - sign: Sign::Negative, - summands: vec![Expression::NaturalNumber(chr_int)], - }, - }); - current_sign = None; - } - (Some(sign), Token::Name(_) | Token::Asterisk) => { - let (var, len) = parse_var_target(&tokens[i..])?; - i += len; + (Some(sign), Ok(Token::Name(_) | Token::Asterisk)) => { + let var = parse_var_target(chars)?; summands.push(match sign { Sign::Positive => Expression::VariableReference(var), Sign::Negative => Expression::SumExpression { @@ -142,10 +147,9 @@ impl Expression { }); current_sign = None; } - (Some(sign), Token::OpenParenthesis) => { - let braced_tokens = get_braced_tokens(&tokens[i..], PARENTHESES)?; - i += 2 + braced_tokens.len(); - let braced_expr = Self::parse(braced_tokens)?; + (Some(sign), Ok(Token::OpenParenthesis)) => { + *chars = s; + let braced_expr = Self::parse(chars)?; // probably inefficent but everything needs to be flattened at some point anyway so won't matter // TODO: make expression structure more efficient (don't use vectors every time there is a negative) summands.push(match (sign, braced_expr.clone()) { @@ -180,6 +184,20 @@ impl Expression { }); current_sign = None; } + // TODO: add delimiters here: `)` `;` `,` `{` `into` + ( + sign, + Ok( + Token::ClosingParenthesis + | Token::Semicolon + | Token::Comma + | Token::OpenBrace + | Token::Into, + ), + ) => { + r_assert!(sign.is_none(), "Expected more terms in expression."); + break; + } // TODO: add source snippet token => r_panic!("Unexpected token {token:#?} found in expression."), } diff --git a/compiler/src/parser/parser.rs b/compiler/src/parser/parser.rs index 28262cc..386e1e2 100644 --- a/compiler/src/parser/parser.rs +++ b/compiler/src/parser/parser.rs @@ -1,8 +1,6 @@ use super::{ expressions::Expression, - tokeniser::{ - find_and_advance, find_next, find_next_whitespace, next_token, skip_whitespace, Token, - }, + tokeniser::{next_token, Token}, types::{ Clause, LocationSpecifier, Reference, TapeCellLocation, VariableTarget, VariableTargetReferenceChain, VariableTypeReference, @@ -153,7 +151,7 @@ fn parse_var_type_definition( { let mut s = *chars; while let Ok(Token::OpenSquareBracket) = next_token(&mut s) { - var_type = VariableTypeReference::Array(Box::new(var_type), parse_array_length(chars)?); + var_type = VariableTypeReference::Array(Box::new(var_type), parse_subscript(chars)?); s = chars; } } @@ -185,21 +183,11 @@ fn parse_subscript(chars: &mut &[char]) -> Result { // TODO: add program snippet r_panic!("Expected `]` in array subscript."); }; - // TODO: fix duplicate error here - digits - .parse::() - .map_err(|_| format!("Expected natural number in array subscript.")) -} - -/// parse_array_subscript but with a length check -fn parse_array_length(chars: &mut &[char]) -> Result { - let len = parse_subscript(chars)?; - // TODO: add source snippet - r_assert!(len > 0, "Array variable cannot be zero-length."); - Ok(len) + // TODO: handle errors here + Ok(digits.parse::().unwrap()) } -fn parse_var_target(chars: &mut &[char]) -> Result { +pub fn parse_var_target(chars: &mut &[char]) -> Result { let is_spread = { let mut s = *chars; if let Ok(Token::Asterisk) = next_token(&mut s) { @@ -261,15 +249,13 @@ fn parse_integer(chars: &mut &[char]) -> Result { // TODO: add source snippet r_panic!("Expected integer.") }; - digits - .parse::() - .map(|magnitude| match is_negative { - // TODO: truncation error handling - false => magnitude as i32, - true => -(magnitude as i32), - }) - // TODO: fix duplicate error here - .map_err(|_| format!("Expected integer.")) + // TODO: handle errors here + let magnitude = digits.parse::().unwrap(); + Ok(match is_negative { + // TODO: truncation error handling + false => magnitude as i32, + true => -(magnitude as i32), + }) } fn parse_integer_tuple(chars: &mut &[char]) -> Result<[i32; LENGTH], String> { @@ -310,27 +296,26 @@ fn parse_if_else_clause( }; let is_not = { - let s = chars; - if let Ok(Token::Not) = next_token(s) { + let mut s = *chars; + if let Ok(Token::Not) = next_token(&mut s) { *chars = s; true } else { false } }; - - let Ok(condition_char_len) = find_next(chars, '{') else { - // TODO: add program snippet to errors - r_panic!("Expected code block in if-else clause."); - }; - let condition = Expression::parse(&chars[..condition_char_len])?; - *chars = &chars[condition_char_len..]; - + let condition = Expression::parse(chars)?; + { + let mut s = *chars; + let Ok(Token::OpenBrace) = next_token(&mut s) else { + r_panic!("Expected code block in if-else clause."); + }; + } let block_one = parse_block(chars)?; let block_two = { - let mut s = chars; - if let Ok(Token::Else) = next_token(s) { + let mut s = *chars; + if let Ok(Token::Else) = next_token(&mut s) { *chars = s; Some(parse_block(chars)?) } else { @@ -368,18 +353,18 @@ fn parse_while_clause( r_panic!("Expected `while` in while clause."); }; - let Ok(condition_char_len) = find_next(chars, '{') else { - // TODO: add program snippet to errors - r_panic!("Expected code block in while clause."); - }; - let condition = Expression::parse(&chars[..condition_char_len])?; - *chars = &chars[condition_char_len..]; - + let condition = Expression::parse(chars)?; // TODO: make while loops support expressions let Expression::VariableReference(condition_variable) = condition else { r_panic!("While clause expected variable target condition."); }; + { + let mut s = *chars; + let Ok(Token::OpenBrace) = next_token(&mut s) else { + r_panic!("Expected code block in while clause."); + }; + } let loop_block = parse_block(chars)?; Ok(Clause::While { @@ -471,10 +456,10 @@ fn parse_struct_definition_clause( fn parse_let_clause(chars: &mut &[char]) -> Result, String> { let var = parse_var_type_definition(chars)?; - let mut s = chars; - if let Ok(Token::EqualsSign) = next_token(s) { - chars = s; - let expr = Expression::parse(find_and_advance(chars, ';'))?; + let mut s = *chars; + if let Ok(Token::EqualsSign) = next_token(&mut s) { + *chars = s; + let expr = Expression::parse(chars)?; let Ok(Token::Semicolon) = next_token(chars) else { r_panic!("Expected semicolon after variable definition."); }; diff --git a/compiler/src/parser/tests.rs b/compiler/src/parser/tests.rs index 55708b3..f1a4e77 100644 --- a/compiler/src/parser/tests.rs +++ b/compiler/src/parser/tests.rs @@ -1,7 +1,7 @@ #[cfg(test)] mod parser_tests { use super::super::{ - expressions::Expression, + expressions::{Expression, Sign}, parser::parse_program, types::{ Clause, ExtendedOpcode, LocationSpecifier, VariableTarget, VariableTypeDefinition, @@ -337,7 +337,7 @@ cell[333] arr = [45, 53]; .eq(&[Clause::DefineVariable { var: VariableTypeDefinition { name: String::from("arr"), - var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 3), + var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 333), location_specifier: LocationSpecifier::None }, value: Expression::ArrayLiteral(vec![ @@ -347,6 +347,73 @@ cell[333] arr = [45, 53]; }])); } + #[test] + fn arrays_2a() { + assert!(parse_program::( + r#" +cell[333] arr = [45 + 123, 53]; +"# + ) + .unwrap() + .iter() + .eq(&[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 333), + location_specifier: LocationSpecifier::None + }, + value: Expression::ArrayLiteral(vec![ + Expression::SumExpression { + sign: Sign::Positive, + summands: vec![ + Expression::NaturalNumber(45), + Expression::NaturalNumber(123) + ] + }, + Expression::NaturalNumber(53) + ]) + }])); + } + + #[test] + fn arrays_2b() { + assert!(parse_program::( + r#" +cell[333] arr = [45 + 123, -(53 + 0+78-9)]; +"# + ) + .unwrap() + .iter() + .eq(&[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 333), + location_specifier: LocationSpecifier::None + }, + value: Expression::ArrayLiteral(vec![ + Expression::SumExpression { + sign: Sign::Positive, + summands: vec![ + Expression::NaturalNumber(45), + Expression::NaturalNumber(123) + ] + }, + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![ + Expression::NaturalNumber(53), + Expression::NaturalNumber(0), + Expression::NaturalNumber(78), + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![Expression::NaturalNumber(9)] + } + ] + } + ]) + }])); + } + #[test] fn arrays_3() { assert!(parse_program::( @@ -423,4 +490,49 @@ struct nonsense[39] arr @-56 = ["hello!", ',', [4,"hello comma: ,",6]]; ]) }])); } + + #[test] + fn sums_1() { + assert!(parse_program::( + r#" +struct nonsense[39] arr @-56 = 56 - ( 4+3+( -7-5 +(6)-(((( (0) )))) ) ); +"# + ) + .unwrap() + .iter() + .eq(&[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 39), + location_specifier: LocationSpecifier::Cell(-56) + }, + value: Expression::SumExpression { + sign: Sign::Positive, + summands: vec![ + Expression::NaturalNumber(56), + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![ + Expression::NaturalNumber(4), + Expression::NaturalNumber(3), + Expression::SumExpression { + sign: Sign::Positive, + summands: vec![ + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![Expression::NaturalNumber(7)] + }, + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![Expression::NaturalNumber(5)] + }, + Expression::NaturalNumber(6) + ] + } + ] + } + ] + } + }])); + } } diff --git a/compiler/src/parser/tokeniser.rs b/compiler/src/parser/tokeniser.rs index dea39eb..bcef58a 100644 --- a/compiler/src/parser/tokeniser.rs +++ b/compiler/src/parser/tokeniser.rs @@ -83,6 +83,7 @@ pub fn next_token(chars: &mut &[char]) -> Result { }) } +// TODO: fix this, make this based on token, currently it has no nuance for strings for example // TODO: figure out errors for these helper functions pub fn find_next(chars: &[char], character: char) -> Result { let mut i = 0; @@ -99,6 +100,7 @@ pub fn find_next(chars: &[char], character: char) -> Result { Ok(i) } +// TODO: fix this, make this based on token, currently it has no nuance for strings for example pub fn find_and_advance<'a>(chars: &'a mut &[char], character: char) -> Result<&'a [char], ()> { let substr_len = find_next(chars, character)?; let chars_before = &chars[..substr_len]; From 6e7f270232316db495457771a31c182fa678771e Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Sun, 9 Nov 2025 10:17:42 +1100 Subject: [PATCH 28/56] Implement parse_block and add tests for it --- compiler/src/parser/parser.rs | 27 ++++++++++++++++++++--- compiler/src/parser/tests.rs | 41 +++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/compiler/src/parser/parser.rs b/compiler/src/parser/parser.rs index 386e1e2..422a70e 100644 --- a/compiler/src/parser/parser.rs +++ b/compiler/src/parser/parser.rs @@ -57,8 +57,29 @@ fn parse_clause( }) } -fn parse_block(chars: &mut &[char]) -> Result>, String> { - todo!() +fn parse_block( + chars: &mut &[char], +) -> Result>, String> { + let Ok(Token::OpenBrace) = next_token(chars) else { + r_panic!("Expected `{{` in code block."); + }; + + let mut clauses = vec![]; + loop { + { + let mut s = *chars; + if let Ok(Token::ClosingBrace) = next_token(&mut s) { + break; + } + } + let clause = parse_clause(chars)?; + if let Clause::None = clause { + break; + } + clauses.push(clause); + } + + Ok(clauses) } //////////////////////////// @@ -287,7 +308,7 @@ fn parse_integer_tuple(chars: &mut &[char]) -> Result<[i32; //////////////////////////// //////////////////////////// -fn parse_if_else_clause( +fn parse_if_else_clause( chars: &mut &[char], ) -> Result, String> { let Ok(Token::If) = next_token(chars) else { diff --git a/compiler/src/parser/tests.rs b/compiler/src/parser/tests.rs index f1a4e77..b42841d 100644 --- a/compiler/src/parser/tests.rs +++ b/compiler/src/parser/tests.rs @@ -535,4 +535,45 @@ struct nonsense[39] arr @-56 = 56 - ( 4+3+( -7-5 +(6)-(((( (0) )))) ) ); } }])); } + + #[test] + fn blocks_1() { + assert!(parse_program::("{}") + .unwrap() + .iter() + .eq(&[Clause::Block(vec![])])); + } + + #[test] + fn blocks_1a() { + assert!(parse_program::(" {}{} {} {} ") + .unwrap() + .iter() + .eq(&[Clause::Block(vec![])])); + } + + #[test] + fn blocks_2() { + assert!( + parse_program::("{output 1;output 2;}{{{} output 3;}}") + .unwrap() + .iter() + .eq(&[ + Clause::Block(vec![ + Clause::OutputValue { + value: Expression::NaturalNumber(1), + }, + Clause::OutputValue { + value: Expression::NaturalNumber(2), + } + ]), + Clause::Block(vec![Clause::Block(vec![ + Clause::Block(vec![]), + Clause::OutputValue { + value: Expression::NaturalNumber(3) + } + ])]) + ]) + ); + } } From a456533703a9e3af01959bd20c1e779524aade12 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Sun, 9 Nov 2025 10:33:10 +1100 Subject: [PATCH 29/56] Add more tests for parsing and tokenising --- compiler/src/parser/tests.rs | 23 ++++++- compiler/src/parser/tokeniser.rs | 100 +++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 1 deletion(-) diff --git a/compiler/src/parser/tests.rs b/compiler/src/parser/tests.rs index b42841d..f7f5d43 100644 --- a/compiler/src/parser/tests.rs +++ b/compiler/src/parser/tests.rs @@ -549,7 +549,28 @@ struct nonsense[39] arr @-56 = 56 - ( 4+3+( -7-5 +(6)-(((( (0) )))) ) ); assert!(parse_program::(" {}{} {} {} ") .unwrap() .iter() - .eq(&[Clause::Block(vec![])])); + .eq(&[ + Clause::Block(vec![]), + Clause::Block(vec![]), + Clause::Block(vec![]), + Clause::Block(vec![]) + ])); + } + + #[test] + fn blocks_1b() { + assert!(parse_program::(" {}{{{{}}{}}} {} {} ") + .unwrap() + .iter() + .eq(&[ + Clause::Block(vec![]), + Clause::Block(vec![Clause::Block(vec![ + Clause::Block(vec![Clause::Block(vec![])]), + Clause::Block(vec![]) + ])]), + Clause::Block(vec![]), + Clause::Block(vec![]) + ])); } #[test] diff --git a/compiler/src/parser/tokeniser.rs b/compiler/src/parser/tokeniser.rs index bcef58a..967f461 100644 --- a/compiler/src/parser/tokeniser.rs +++ b/compiler/src/parser/tokeniser.rs @@ -166,6 +166,106 @@ mod tokeniser_tests { assert!(actual_output.iter().eq(desired_output)); } + #[test] + fn keywords_1() { + _tokenisation_test( + "while output input if", + &[Token::While, Token::Output, Token::Input, Token::If], + ); + } + + #[test] + fn keywords_2() { + _tokenisation_test( + "into clobbers assert bf else;;;;", + &[ + Token::Into, + Token::Clobbers, + Token::Assert, + Token::Bf, + Token::Else, + Token::Semicolon, + Token::Semicolon, + Token::Semicolon, + Token::Semicolon, + ], + ); + } + + #[test] + fn names_1() { + _tokenisation_test("i", &[Token::Name(String::from("i"))]); + } + + #[test] + fn names_1a() { + _tokenisation_test("_", &[Token::Name(String::from("_"))]); + } + + #[test] + fn names_2() { + _tokenisation_test( + "while hello", + &[Token::While, Token::Name(String::from("hello"))], + ); + } + + #[test] + fn names_2a() { + _tokenisation_test( + "while_", + &[Token::While, Token::Name(String::from("while_"))], + ); + } + + #[test] + fn names_2b() { + _tokenisation_test( + "if_else_while_hello;welcome\ninto the if club", + &[ + Token::Name(String::from("if_else_while_hello")), + Token::Semicolon, + Token::Name(String::from("welcome")), + Token::Into, + Token::Name(String::from("the")), + Token::If, + Token::Name(String::from("club")), + ], + ); + } + + #[test] + fn names_2c() { + _tokenisation_test( + "hello{If;elSe ___if}\n\n\nclobberss", + &[ + Token::Name(String::from("hello")), + Token::OpenBrace, + Token::Name(String::from("If")), + Token::Semicolon, + Token::Name(String::from("elSe")), + Token::Name(String::from("___if")), + Token::ClosingBrace, + Token::Name(String::from("clobberss")), + ], + ); + } + + #[test] + fn names_2d() { + _tokenisation_test( + "hello while you were gone I", + &[ + Token::Name(String::from("hello")), + Token::While, + Token::Name(String::from("you")), + Token::Name(String::from("were")), + Token::Name(String::from("gone")), + Token::Name(String::from("I")), + ], + ); + } + #[test] fn character_literals_1() { _tokenisation_test( From b44b1889fce9879a4b82ced1543c1a97b890b1ca Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Mon, 10 Nov 2025 08:56:01 +1100 Subject: [PATCH 30/56] Add tests for tokeniser and increment --- compiler/src/parser/tokeniser.rs | 298 +++++++++++++++++++++++++------ compiler/src/tests.rs | 116 ++++++++++++ 2 files changed, 360 insertions(+), 54 deletions(-) diff --git a/compiler/src/parser/tokeniser.rs b/compiler/src/parser/tokeniser.rs index 967f461..eb9a596 100644 --- a/compiler/src/parser/tokeniser.rs +++ b/compiler/src/parser/tokeniser.rs @@ -13,6 +13,16 @@ pub enum Token { If, Not, Else, + Copy, + Drain, + Into, + Bf, + Clobbers, + Assert, + Equals, + Unknown, + True, + False, OpenBrace, ClosingBrace, OpenSquareBracket, @@ -23,22 +33,14 @@ pub enum Token { Dot, Asterisk, At, - Copy, - Drain, - Into, - Bf, - Clobbers, - Assert, - Equals, - Unknown, Name(String), Digits(String), String(String), Character(char), - True, - False, - Minus, Plus, + Minus, + PlusPlus, + MinusMinus, PlusEquals, MinusEquals, EqualsSign, @@ -48,37 +50,57 @@ pub enum Token { /// Get the next token from chars, advance the passed in pointer pub fn next_token(chars: &mut &[char]) -> Result { // skip any whitespace - skip_whitespace(chars)?; - - // TODO: this is flawed, what about cell g=5;? - let token_len = find_next_whitespace(*chars)?; - - Ok(match token_len { - 0 => return Err(()), - 1 => match chars[0] { - '{' => Token::OpenBrace, - '}' => Token::ClosingBrace, - _ => todo!(), - }, - 2 => match chars[0..2] { - ['b', 'f'] => Token::Bf, - ['i', 'f'] => Token::If, - _ => todo!(), - }, - 3 => match chars[0..3] { - ['n', 'o', 't'] => Token::Not, - _ => todo!(), - }, - 4 => match chars[0..4] { - ['c', 'e', 'l', 'l'] => Token::Cell, - ['e', 'l', 's', 'e'] => Token::Else, - ['t', 'r', 'u', 'e'] => Token::True, - _ => todo!(), - }, - 5 => match chars[0..5] { - ['w', 'h', 'i', 'l', 'e'] => Token::While, - _ => todo!(), - }, + skip_whitespace(chars); + + // read the first character and branch from there + let Some(c) = chars.get(0) else { + return Ok(Token::None); + }; + Ok(match *c { + ';' => { + *chars = &chars[1..]; + Token::Semicolon + } + '{' => { + *chars = &chars[1..]; + Token::OpenBrace + } + '}' => { + *chars = &chars[1..]; + Token::ClosingBrace + } + '(' => { + *chars = &chars[1..]; + Token::OpenParenthesis + } + ')' => { + *chars = &chars[1..]; + Token::ClosingParenthesis + } + '[' => { + *chars = &chars[1..]; + Token::OpenSquareBracket + } + ']' => { + *chars = &chars[1..]; + Token::ClosingSquareBracket + } + '.' => { + *chars = &chars[1..]; + Token::Dot + } + ',' => { + *chars = &chars[1..]; + Token::Comma + } + '*' => { + *chars = &chars[1..]; + Token::Asterisk + } + '@' => { + *chars = &chars[1..]; + Token::At + } _ => todo!(), }) } @@ -108,18 +130,18 @@ pub fn find_and_advance<'a>(chars: &'a mut &[char], character: char) -> Result<& Ok(chars_before) } -pub fn skip_whitespace(chars: &mut &[char]) -> Result<(), ()> { +pub fn skip_whitespace(chars: &mut &[char]) { loop { - let Some(c) = chars.get(0) else { - return Err(()); - }; - - if !c.is_whitespace() { - break; + match chars.get(0) { + Some(c) => { + if !c.is_whitespace() { + break; + } + } + None => break, } *chars = &chars[1..]; } - Ok(()) } pub fn find_next_whitespace(chars: &[char]) -> Result { @@ -149,7 +171,7 @@ mod tokeniser_tests { let mut tokens = vec![]; loop { let Ok(token) = next_token(&mut chars_slice) else { - r_panic!("Invlid token in input."); + r_panic!("Invalid token in input."); }; if let Token::None = token { break; @@ -167,10 +189,178 @@ mod tokeniser_tests { } #[test] - fn keywords_1() { + fn single_tokens() { + _tokenisation_test( + "==;;**@@[[{{((]]}}))..,,", + &[ + Token::EqualsSign, + Token::EqualsSign, + Token::Semicolon, + Token::Semicolon, + Token::Asterisk, + Token::Asterisk, + Token::At, + Token::At, + Token::OpenSquareBracket, + Token::OpenSquareBracket, + Token::OpenBrace, + Token::OpenBrace, + Token::OpenParenthesis, + Token::OpenParenthesis, + Token::ClosingSquareBracket, + Token::ClosingSquareBracket, + Token::ClosingBrace, + Token::ClosingBrace, + Token::ClosingParenthesis, + Token::ClosingParenthesis, + Token::Dot, + Token::Dot, + Token::Comma, + Token::Comma, + ], + ); + } + + #[test] + fn double_tokens_1() { + _tokenisation_test( + "+=+=-=-=++++----", + &[ + Token::PlusEquals, + Token::PlusEquals, + Token::MinusEquals, + Token::MinusEquals, + Token::PlusPlus, + Token::PlusPlus, + Token::MinusMinus, + Token::MinusMinus, + ], + ); + } + + #[test] + fn double_tokens_2() { + _tokenisation_test( + "-++=+++=+-=--=---=-+++++-+-----", + &[ + Token::Minus, + Token::PlusPlus, + Token::EqualsSign, + Token::PlusPlus, + Token::PlusEquals, + Token::Plus, + Token::MinusEquals, + Token::MinusMinus, + Token::EqualsSign, + Token::MinusMinus, + Token::MinusEquals, + Token::Minus, + Token::PlusPlus, + Token::PlusPlus, + Token::Plus, + Token::Minus, + Token::Plus, + Token::MinusMinus, + Token::MinusMinus, + Token::Minus, + ], + ); + } + + #[test] + fn single_and_double() { _tokenisation_test( - "while output input if", - &[Token::While, Token::Output, Token::Input, Token::If], + "=+==;+=-=;*---=++*@@[[{{++((]--]}+-+})).---.,,", + &[ + Token::EqualsSign, + Token::PlusEquals, + Token::EqualsSign, + Token::Semicolon, + Token::PlusEquals, + Token::MinusEquals, + Token::Semicolon, + Token::Asterisk, + Token::MinusMinus, + Token::MinusEquals, + Token::PlusPlus, + Token::Asterisk, + Token::At, + Token::At, + Token::OpenSquareBracket, + Token::OpenSquareBracket, + Token::OpenBrace, + Token::OpenBrace, + Token::PlusPlus, + Token::OpenParenthesis, + Token::OpenParenthesis, + Token::ClosingSquareBracket, + Token::MinusMinus, + Token::ClosingSquareBracket, + Token::ClosingBrace, + Token::Plus, + Token::Minus, + Token::Plus, + Token::ClosingBrace, + Token::ClosingParenthesis, + Token::ClosingParenthesis, + Token::Dot, + Token::MinusMinus, + Token::Minus, + Token::Dot, + Token::Comma, + Token::Comma, + ], + ); + } + + #[test] + fn keywords() { + _tokenisation_test( + r#" +output output input input fn fn cell cell struct struct while while if +if not not else else copy copy drain drain into into bf bf clobbers clobbers + assert assert equals equals unknown unknown true true false false +"#, + &[ + Token::Output, + Token::Output, + Token::Input, + Token::Input, + Token::Fn, + Token::Fn, + Token::Cell, + Token::Cell, + Token::Struct, + Token::Struct, + Token::While, + Token::While, + Token::If, + Token::If, + Token::Not, + Token::Not, + Token::Else, + Token::Else, + Token::Copy, + Token::Copy, + Token::Drain, + Token::Drain, + Token::Into, + Token::Into, + Token::Bf, + Token::Bf, + Token::Clobbers, + Token::Clobbers, + Token::Assert, + Token::Assert, + Token::Equals, + Token::Equals, + Token::Unknown, + Token::Unknown, + Token::True, + Token::True, + Token::False, + Token::False, + ], ); } diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index 170ac7a..95ffb7b 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -527,6 +527,122 @@ output x + 'f'; assert_eq!(run_code(BVM_CONFIG_1D, &code, "", None).unwrap(), "f"); } + #[test] + fn increment_1() { + let program = r#" +cell x = 'h'; +output x; +++x; +output x; +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "hi" + ) + } + + #[test] + fn increment_2() { + let program = r#" +cell x = 'h'; +output x; +--x; +output x; +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "hg" + ) + } + + // TODO: add pre-increment to expressions? (probably not worth it) + #[test] + #[ignore] + fn increment_3() { + let program = r#" +cell x = 'a'; +output ++x; +output x; +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "bb" + ) + } + + #[test] + #[ignore] + fn increment_3a() { + let program = r#" +cell x = 'a'; +output x; +output ++x + 2; +output x; +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "adb" + ) + } + + #[test] + #[ignore] + fn increment_3b() { + let program = r#" +cell x = 'd'; +output --x; +output x; +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "cc" + ) + } + + #[test] + #[ignore] + fn increment_3c() { + let program = r#" +cell x = 'd'; +output 4+--x; +output --x; +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "gb" + ) + } + + #[test] + #[ignore] + fn increment_4() { + let program = r#" +cell x = -1; +if ++x {output 'T';} +else {output 'F';} +output 'e' + ++x; +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "Ff" + ) + } + + #[test] + #[ignore] + fn increment_4a() { + let program = r#" +cell x = 0; +if --x {output 'T';} +else {output 'F';} +output 'e' + x; +"#; + assert_eq!( + compile_and_run::(program, "").unwrap(), + "Td" + ) + } + #[test] fn loops_1() { let program = r#" From 7d54f462274ccd80b7b1a5f12de7440ce6d31052 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Mon, 10 Nov 2025 09:32:18 +1100 Subject: [PATCH 31/56] Update Token names and tokeniser tests --- compiler/src/backend/bf.rs | 4 +- compiler/src/backend/bf2d.rs | 4 +- compiler/src/parser/expressions.rs | 14 +- compiler/src/parser/parser.rs | 34 ++-- compiler/src/parser/tokeniser.rs | 255 +++++++++++++++++++++-------- 5 files changed, 214 insertions(+), 97 deletions(-) diff --git a/compiler/src/backend/bf.rs b/compiler/src/backend/bf.rs index c5fc29d..261a881 100644 --- a/compiler/src/backend/bf.rs +++ b/compiler/src/backend/bf.rs @@ -38,8 +38,8 @@ impl OpcodeVariant for Opcode { Token::Minus => Opcode::Subtract, // Token::MoreThan => Opcode::Right, // Token::LessThan => Opcode::Left, - Token::OpenSquareBracket => Opcode::OpenLoop, - Token::ClosingSquareBracket => Opcode::CloseLoop, + Token::LeftSquareBracket => Opcode::OpenLoop, + Token::RightSquareBracket => Opcode::CloseLoop, Token::Dot => Opcode::Output, Token::Comma => Opcode::Input, _ => r_panic!("Invalid token in inline Brainfuck: {token:?}"), diff --git a/compiler/src/backend/bf2d.rs b/compiler/src/backend/bf2d.rs index e8f34c4..b85147f 100644 --- a/compiler/src/backend/bf2d.rs +++ b/compiler/src/backend/bf2d.rs @@ -43,8 +43,8 @@ impl OpcodeVariant for Opcode2D { Token::Minus => Opcode2D::Subtract, // Token::MoreThan => Opcode2D::Right, // Token::LessThan => Opcode2D::Left, - Token::OpenSquareBracket => Opcode2D::OpenLoop, - Token::ClosingSquareBracket => Opcode2D::CloseLoop, + Token::LeftSquareBracket => Opcode2D::OpenLoop, + Token::RightSquareBracket => Opcode2D::CloseLoop, Token::Dot => Opcode2D::Output, Token::Comma => Opcode2D::Input, // Token::Caret => Opcode2D::Up, diff --git a/compiler/src/parser/expressions.rs b/compiler/src/parser/expressions.rs index 42671ab..1a5b7b1 100644 --- a/compiler/src/parser/expressions.rs +++ b/compiler/src/parser/expressions.rs @@ -54,13 +54,13 @@ impl Expression { { let mut s = *chars; - if let Ok(Token::OpenSquareBracket) = next_token(&mut s) { + if let Ok(Token::LeftSquareBracket) = next_token(&mut s) { *chars = s; let mut expressions = vec![]; loop { expressions.push(Self::parse(chars)?); match next_token(chars) { - Ok(Token::ClosingSquareBracket) => break, + Ok(Token::RightSquareBracket) => break, Ok(Token::Comma) => { *chars = s; } @@ -72,8 +72,8 @@ impl Expression { let Ok( Token::Semicolon | Token::Comma - | Token::ClosingParenthesis - | Token::ClosingSquareBracket + | Token::RightParenthesis + | Token::RightSquareBracket | Token::None, ) = next_token(&mut s) else { @@ -147,7 +147,7 @@ impl Expression { }); current_sign = None; } - (Some(sign), Ok(Token::OpenParenthesis)) => { + (Some(sign), Ok(Token::LeftParenthesis)) => { *chars = s; let braced_expr = Self::parse(chars)?; // probably inefficent but everything needs to be flattened at some point anyway so won't matter @@ -188,10 +188,10 @@ impl Expression { ( sign, Ok( - Token::ClosingParenthesis + Token::RightParenthesis | Token::Semicolon | Token::Comma - | Token::OpenBrace + | Token::LeftBrace | Token::Into, ), ) => { diff --git a/compiler/src/parser/parser.rs b/compiler/src/parser/parser.rs index 422a70e..da6742a 100644 --- a/compiler/src/parser/parser.rs +++ b/compiler/src/parser/parser.rs @@ -48,7 +48,7 @@ fn parse_clause( r_panic!("Expected identifier after `struct` keyword."); }; match next_token(&mut s) { - Ok(Token::OpenBrace) => parse_struct_definition_clause(chars)?, + Ok(Token::LeftBrace) => parse_struct_definition_clause(chars)?, _ => parse_let_clause(chars)?, } } @@ -60,7 +60,7 @@ fn parse_clause( fn parse_block( chars: &mut &[char], ) -> Result>, String> { - let Ok(Token::OpenBrace) = next_token(chars) else { + let Ok(Token::LeftBrace) = next_token(chars) else { r_panic!("Expected `{{` in code block."); }; @@ -68,7 +68,7 @@ fn parse_block( loop { { let mut s = *chars; - if let Ok(Token::ClosingBrace) = next_token(&mut s) { + if let Ok(Token::RightBrace) = next_token(&mut s) { break; } } @@ -124,7 +124,7 @@ impl TapeCellLocation for TapeCell2D { *chars = s; match next_token(&mut s) { - Ok(Token::OpenParenthesis) => { + Ok(Token::LeftParenthesis) => { // parse a 2-tuple let tuple = parse_integer_tuple::<2>(chars)?; Ok(LocationSpecifier::Cell(TapeCell2D(tuple[0], tuple[1]))) @@ -171,7 +171,7 @@ fn parse_var_type_definition( // parse array specifiers { let mut s = *chars; - while let Ok(Token::OpenSquareBracket) = next_token(&mut s) { + while let Ok(Token::LeftSquareBracket) = next_token(&mut s) { var_type = VariableTypeReference::Array(Box::new(var_type), parse_subscript(chars)?); s = chars; } @@ -192,7 +192,7 @@ fn parse_var_type_definition( /// parse the subscript of an array variable, e.g. [4] [6] [0] /// must be compile-time constant fn parse_subscript(chars: &mut &[char]) -> Result { - let Ok(Token::OpenSquareBracket) = next_token(chars) else { + let Ok(Token::LeftSquareBracket) = next_token(chars) else { // TODO: add program snippet r_panic!("Expected `[` in array subscript."); }; @@ -200,7 +200,7 @@ fn parse_subscript(chars: &mut &[char]) -> Result { // TODO: add program snippet r_panic!("Expected natural number in array subscript."); }; - let Ok(Token::ClosingSquareBracket) = next_token(chars) else { + let Ok(Token::RightSquareBracket) = next_token(chars) else { // TODO: add program snippet r_panic!("Expected `]` in array subscript."); }; @@ -228,7 +228,7 @@ pub fn parse_var_target(chars: &mut &[char]) -> Result { let mut s = *chars; loop { match next_token(&mut s) { - Ok(Token::OpenSquareBracket) => { + Ok(Token::LeftSquareBracket) => { let index = parse_subscript(chars)?; ref_chain.push(Reference::Index(index)); } @@ -280,7 +280,7 @@ fn parse_integer(chars: &mut &[char]) -> Result { } fn parse_integer_tuple(chars: &mut &[char]) -> Result<[i32; LENGTH], String> { - let Ok(Token::OpenParenthesis) = next_token(chars) else { + let Ok(Token::LeftParenthesis) = next_token(chars) else { // TODO: add source snippet r_panic!("Expected opening parenthesis in tuple.") }; @@ -296,7 +296,7 @@ fn parse_integer_tuple(chars: &mut &[char]) -> Result<[i32; }; } } - let Ok(Token::ClosingParenthesis) = next_token(chars) else { + let Ok(Token::RightParenthesis) = next_token(chars) else { // TODO: add source snippet r_panic!("Expected closing parenthesis in tuple."); }; @@ -328,7 +328,7 @@ fn parse_if_else_clause( let condition = Expression::parse(chars)?; { let mut s = *chars; - let Ok(Token::OpenBrace) = next_token(&mut s) else { + let Ok(Token::LeftBrace) = next_token(&mut s) else { r_panic!("Expected code block in if-else clause."); }; } @@ -382,7 +382,7 @@ fn parse_while_clause( { let mut s = *chars; - let Ok(Token::OpenBrace) = next_token(&mut s) else { + let Ok(Token::LeftBrace) = next_token(&mut s) else { r_panic!("Expected code block in while clause."); }; } @@ -407,7 +407,7 @@ fn parse_function_definition_clause( r_panic!("Expected name in function definition clause."); }; - let Ok(Token::OpenParenthesis) = next_token(chars) else { + let Ok(Token::LeftParenthesis) = next_token(chars) else { // TODO: add source snippet r_panic!("Expected argument list in function definition clause."); }; @@ -415,7 +415,7 @@ fn parse_function_definition_clause( loop { { let mut s = *chars; - if let Ok(Token::ClosingParenthesis) = next_token(&mut s) { + if let Ok(Token::RightParenthesis) = next_token(&mut s) { *chars = s; break; } @@ -423,7 +423,7 @@ fn parse_function_definition_clause( arguments.push(parse_var_type_definition(chars)?); match next_token(chars) { - Ok(Token::ClosingParenthesis) => break, + Ok(Token::RightParenthesis) => break, Ok(Token::Comma) => (), // TODO: add source snippet _ => r_panic!("Unexpected token in function argument list."), @@ -451,7 +451,7 @@ fn parse_struct_definition_clause( r_panic!("Expected name in struct definition."); }; - let Ok(Token::OpenBrace) = next_token(chars) else { + let Ok(Token::LeftBrace) = next_token(chars) else { // TODO: add source snippet r_panic!("Expected `{{` in struct clause."); }; @@ -464,7 +464,7 @@ fn parse_struct_definition_clause( // TODO: add source snippet r_panic!("Expected semicolon after struct definition field."); }; - if let Ok(Token::ClosingBrace) = next_token(chars) { + if let Ok(Token::RightBrace) = next_token(chars) { break; } } diff --git a/compiler/src/parser/tokeniser.rs b/compiler/src/parser/tokeniser.rs index eb9a596..00ce549 100644 --- a/compiler/src/parser/tokeniser.rs +++ b/compiler/src/parser/tokeniser.rs @@ -23,12 +23,12 @@ pub enum Token { Unknown, True, False, - OpenBrace, - ClosingBrace, - OpenSquareBracket, - ClosingSquareBracket, - OpenParenthesis, - ClosingParenthesis, + LeftBrace, + RightBrace, + LeftSquareBracket, + RightSquareBracket, + LeftParenthesis, + RightParenthesis, Comma, Dot, Asterisk, @@ -63,27 +63,27 @@ pub fn next_token(chars: &mut &[char]) -> Result { } '{' => { *chars = &chars[1..]; - Token::OpenBrace + Token::LeftBrace } '}' => { *chars = &chars[1..]; - Token::ClosingBrace + Token::RightBrace } '(' => { *chars = &chars[1..]; - Token::OpenParenthesis + Token::LeftParenthesis } ')' => { *chars = &chars[1..]; - Token::ClosingParenthesis + Token::RightParenthesis } '[' => { *chars = &chars[1..]; - Token::OpenSquareBracket + Token::LeftSquareBracket } ']' => { *chars = &chars[1..]; - Token::ClosingSquareBracket + Token::RightSquareBracket } '.' => { *chars = &chars[1..]; @@ -189,40 +189,63 @@ mod tokeniser_tests { } #[test] - fn single_tokens() { - _tokenisation_test( - "==;;**@@[[{{((]]}}))..,,", - &[ - Token::EqualsSign, - Token::EqualsSign, - Token::Semicolon, - Token::Semicolon, - Token::Asterisk, - Token::Asterisk, - Token::At, - Token::At, - Token::OpenSquareBracket, - Token::OpenSquareBracket, - Token::OpenBrace, - Token::OpenBrace, - Token::OpenParenthesis, - Token::OpenParenthesis, - Token::ClosingSquareBracket, - Token::ClosingSquareBracket, - Token::ClosingBrace, - Token::ClosingBrace, - Token::ClosingParenthesis, - Token::ClosingParenthesis, - Token::Dot, - Token::Dot, - Token::Comma, - Token::Comma, - ], - ); + fn empty_1() { + _tokenisation_test("", &[]); } #[test] - fn double_tokens_1() { + fn empty_1a() { + _tokenisation_test(" \n \t ", &[]); + } + + #[test] + fn empty_2() { + let chars_vec: Vec = "".chars().collect(); + let mut chars_slice = &chars_vec[..]; + assert_eq!(next_token(&mut chars_slice).unwrap(), Token::None); + } + + #[test] + fn empty_2a() { + let chars_vec: Vec = "\n \t \n ".chars().collect(); + let mut chars_slice = &chars_vec[..]; + assert_eq!(next_token(&mut chars_slice).unwrap(), Token::None); + } + + #[test] + fn single() { + let desired_output = [ + Token::EqualsSign, + Token::EqualsSign, + Token::Semicolon, + Token::Semicolon, + Token::Asterisk, + Token::Asterisk, + Token::At, + Token::At, + Token::LeftSquareBracket, + Token::LeftSquareBracket, + Token::LeftBrace, + Token::LeftBrace, + Token::LeftParenthesis, + Token::LeftParenthesis, + Token::RightSquareBracket, + Token::RightSquareBracket, + Token::RightBrace, + Token::RightBrace, + Token::RightParenthesis, + Token::RightParenthesis, + Token::Dot, + Token::Dot, + Token::Comma, + Token::Comma, + ]; + _tokenisation_test("==;;**@@[[{{((]]}}))..,,", &desired_output); + _tokenisation_test(" == ; ;**@ @[[ {{ ( (] ]}} )). ., ,", &desired_output); + } + + #[test] + fn double_1() { _tokenisation_test( "+=+=-=-=++++----", &[ @@ -239,7 +262,28 @@ mod tokeniser_tests { } #[test] - fn double_tokens_2() { + fn double_1a() { + _tokenisation_test( + "+ =+ = -= -=+ +++ - - --", + &[ + Token::Plus, + Token::EqualsSign, + Token::Plus, + Token::EqualsSign, + Token::MinusEquals, + Token::MinusEquals, + Token::Plus, + Token::PlusPlus, + Token::Plus, + Token::Minus, + Token::Minus, + Token::MinusMinus, + ], + ); + } + + #[test] + fn double_2() { _tokenisation_test( "-++=+++=+-=--=---=-+++++-+-----", &[ @@ -267,46 +311,81 @@ mod tokeniser_tests { ); } + #[test] + fn double_2a() { + _tokenisation_test( + "-+ +=+ ++=+-=-- =-- - =-+ +++ +-+-- - --", + &[ + Token::Minus, + Token::Plus, + Token::PlusEquals, + Token::Plus, + Token::Plus, + Token::PlusEquals, + Token::Plus, + Token::MinusEquals, + Token::MinusMinus, + Token::EqualsSign, + Token::MinusMinus, + Token::Minus, + Token::EqualsSign, + Token::Minus, + Token::Plus, + Token::PlusPlus, + Token::Plus, + Token::Plus, + Token::Minus, + Token::Plus, + Token::MinusMinus, + Token::Minus, + Token::MinusMinus, + ], + ); + } + #[test] fn single_and_double() { _tokenisation_test( - "=+==;+=-=;*---=++*@@[[{{++((]--]}+-+})).---.,,", + "=+==;+=- =;*---=++*@@[[{{+ +((]--]}+-+})).---.-,,", &[ Token::EqualsSign, Token::PlusEquals, Token::EqualsSign, Token::Semicolon, Token::PlusEquals, - Token::MinusEquals, + Token::Minus, + Token::EqualsSign, Token::Semicolon, Token::Asterisk, Token::MinusMinus, Token::MinusEquals, - Token::PlusPlus, + Token::Plus, + Token::Plus, Token::Asterisk, Token::At, Token::At, - Token::OpenSquareBracket, - Token::OpenSquareBracket, - Token::OpenBrace, - Token::OpenBrace, + Token::LeftSquareBracket, + Token::LeftSquareBracket, + Token::LeftBrace, + Token::LeftBrace, Token::PlusPlus, - Token::OpenParenthesis, - Token::OpenParenthesis, - Token::ClosingSquareBracket, + Token::LeftParenthesis, + Token::LeftParenthesis, + Token::RightSquareBracket, Token::MinusMinus, - Token::ClosingSquareBracket, - Token::ClosingBrace, + Token::RightSquareBracket, + Token::RightBrace, Token::Plus, Token::Minus, Token::Plus, - Token::ClosingBrace, - Token::ClosingParenthesis, - Token::ClosingParenthesis, + Token::RightBrace, + Token::RightParenthesis, + Token::RightParenthesis, Token::Dot, Token::MinusMinus, Token::Minus, Token::Dot, + Token::Minus, Token::Comma, Token::Comma, ], @@ -365,19 +444,57 @@ if not not else else copy copy drain drain into into bf bf clobbers clobbers } #[test] - fn keywords_2() { + fn keywords_and_simples() { _tokenisation_test( - "into clobbers assert bf else;;;;", + r#"unknown,assert,equals.into;struct)clobbers-- -+input+++not(else{ +if fn{output)true)false -while* @copy@+=@drain-=into=][bf.cell"#, &[ + Token::Unknown, + Token::Comma, + Token::Assert, + Token::Comma, + Token::Equals, + Token::Dot, Token::Into, + Token::Semicolon, + Token::Struct, + Token::RightParenthesis, Token::Clobbers, - Token::Assert, - Token::Bf, + Token::MinusMinus, + Token::Minus, + Token::Plus, + Token::Input, + Token::Plus, + Token::PlusPlus, + Token::Not, + Token::LeftParenthesis, Token::Else, - Token::Semicolon, - Token::Semicolon, - Token::Semicolon, - Token::Semicolon, + Token::LeftBrace, + Token::If, + Token::Fn, + Token::LeftBrace, + Token::Output, + Token::RightParenthesis, + Token::True, + Token::RightParenthesis, + Token::False, + Token::Minus, + Token::While, + Token::Asterisk, + Token::At, + Token::Copy, + Token::At, + Token::PlusEquals, + Token::At, + Token::Drain, + Token::MinusEquals, + Token::Into, + Token::EqualsSign, + Token::RightSquareBracket, + Token::LeftSquareBracket, + Token::Bf, + Token::Dot, + Token::Cell, ], ); } @@ -430,12 +547,12 @@ if not not else else copy copy drain drain into into bf bf clobbers clobbers "hello{If;elSe ___if}\n\n\nclobberss", &[ Token::Name(String::from("hello")), - Token::OpenBrace, + Token::LeftBrace, Token::Name(String::from("If")), Token::Semicolon, Token::Name(String::from("elSe")), Token::Name(String::from("___if")), - Token::ClosingBrace, + Token::RightBrace, Token::Name(String::from("clobberss")), ], ); From 30ca1256c9de345ad6194ee12c34b5dee60f3bfc Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Mon, 10 Nov 2025 10:10:24 +1100 Subject: [PATCH 32/56] Add character literals to new tokeniser --- compiler/src/parser/old_tokeniser.rs | 22 ----- compiler/src/parser/parser.rs | 135 +++++++++++++-------------- compiler/src/parser/tokeniser.rs | 119 +++++++++++++++-------- 3 files changed, 144 insertions(+), 132 deletions(-) diff --git a/compiler/src/parser/old_tokeniser.rs b/compiler/src/parser/old_tokeniser.rs index 45f8bdc..29e2208 100644 --- a/compiler/src/parser/old_tokeniser.rs +++ b/compiler/src/parser/old_tokeniser.rs @@ -143,28 +143,6 @@ fn strip_line(line: &str) -> String { .join(" ") } -/// handle character escape sequences -// supports Rust ASCII escapes -fn tokenise_raw_character_literal(raw: &str) -> Result { - let mut s_iter = raw.chars(); - Ok(match s_iter.next() { - Some('\\') => match s_iter.next() { - Some(c) => match c { - '\'' => '\'', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - '\\' => '\\', - '0' => '\0', - _ => r_panic!("Invalid escape sequence in character literal: {raw}"), - }, - None => r_panic!("Expected escape sequence in character literal: {raw}"), - }, - Some(first_char) => first_char, - None => r_panic!("Character literal must be length 1: {raw}"), - }) -} - /// handle string escape sequences // supports Rust ASCII escapes fn tokenise_raw_string_literal(raw: &str) -> Result { diff --git a/compiler/src/parser/parser.rs b/compiler/src/parser/parser.rs index da6742a..fe91b2d 100644 --- a/compiler/src/parser/parser.rs +++ b/compiler/src/parser/parser.rs @@ -37,30 +37,30 @@ fn parse_clause( chars: &mut &[char], ) -> Result, String> { let mut s = *chars; - Ok(match next_token(&mut s) { - Ok(Token::None) => Clause::None, - Ok(Token::If) => parse_if_else_clause(chars)?, - Ok(Token::While) => parse_while_clause(chars)?, - Ok(Token::Fn) => parse_function_definition_clause(chars)?, - Ok(Token::Struct) => { - let Ok(Token::Name(_)) = next_token(&mut s) else { + Ok(match next_token(&mut s)? { + Token::None => Clause::None, + Token::If => parse_if_else_clause(chars)?, + Token::While => parse_while_clause(chars)?, + Token::Fn => parse_function_definition_clause(chars)?, + Token::Struct => { + let Token::Name(_) = next_token(&mut s)? else { // TODO: add source snippet r_panic!("Expected identifier after `struct` keyword."); }; - match next_token(&mut s) { - Ok(Token::LeftBrace) => parse_struct_definition_clause(chars)?, + match next_token(&mut s)? { + Token::LeftBrace => parse_struct_definition_clause(chars)?, _ => parse_let_clause(chars)?, } } - Ok(Token::Cell) => parse_let_clause(chars)?, - Err(()) | Ok(_) => r_panic!("Invalid starting token."), + Token::Cell => parse_let_clause(chars)?, + _ => r_panic!("Invalid starting token."), }) } fn parse_block( chars: &mut &[char], ) -> Result>, String> { - let Ok(Token::LeftBrace) = next_token(chars) else { + let Token::LeftBrace = next_token(chars)? else { r_panic!("Expected `{{` in code block."); }; @@ -68,7 +68,7 @@ fn parse_block( loop { { let mut s = *chars; - if let Ok(Token::RightBrace) = next_token(&mut s) { + if let Token::RightBrace = next_token(&mut s)? { break; } } @@ -91,17 +91,15 @@ impl TapeCellLocation for TapeCell { chars: &mut &[char], ) -> Result, String> { let mut s = *chars; - let Ok(Token::At) = next_token(&mut s) else { + let Token::At = next_token(&mut s)? else { return Ok(LocationSpecifier::None); }; *chars = s; - match next_token(&mut s) { - Ok(Token::Minus | Token::Digits(_)) => { - Ok(LocationSpecifier::Cell(parse_integer(chars)?)) - } + match next_token(&mut s)? { + Token::Minus | Token::Digits(_) => Ok(LocationSpecifier::Cell(parse_integer(chars)?)), // variable location specifier: - Ok(Token::Name(_)) => Ok(LocationSpecifier::Variable(parse_var_target(chars)?)), + Token::Name(_) => Ok(LocationSpecifier::Variable(parse_var_target(chars)?)), // TODO: add source snippet _ => r_panic!("Invalid location specifier.",), } @@ -118,23 +116,23 @@ impl TapeCellLocation for TapeCell2D { chars: &mut &[char], ) -> Result, String> { let mut s = *chars; - let Ok(Token::At) = next_token(&mut s) else { + let Token::At = next_token(&mut s)? else { return Ok(LocationSpecifier::None); }; *chars = s; - match next_token(&mut s) { - Ok(Token::LeftParenthesis) => { + match next_token(&mut s)? { + Token::LeftParenthesis => { // parse a 2-tuple let tuple = parse_integer_tuple::<2>(chars)?; Ok(LocationSpecifier::Cell(TapeCell2D(tuple[0], tuple[1]))) } - Ok(Token::Minus | Token::Digits(_)) => Ok(LocationSpecifier::Cell(TapeCell2D( + Token::Minus | Token::Digits(_) => Ok(LocationSpecifier::Cell(TapeCell2D( parse_integer(chars)?, 0, ))), // variable location specifier: - Ok(Token::Name(_)) => Ok(LocationSpecifier::Variable(parse_var_target(chars)?)), + Token::Name(_) => Ok(LocationSpecifier::Variable(parse_var_target(chars)?)), // TODO: add source snippet _ => r_panic!("Invalid location specifier."), } @@ -152,10 +150,10 @@ impl TapeCellLocation for TapeCell2D { fn parse_var_type_definition( chars: &mut &[char], ) -> Result, String> { - let mut var_type = match next_token(chars) { - Ok(Token::Cell) => VariableTypeReference::Cell, - Ok(Token::Struct) => { - let Ok(Token::Name(struct_name)) = next_token(chars) else { + let mut var_type = match next_token(chars)? { + Token::Cell => VariableTypeReference::Cell, + Token::Struct => { + let Token::Name(struct_name) = next_token(chars)? else { // TODO: add source snippet r_panic!("Expected struct type name in variable definition."); }; @@ -171,13 +169,13 @@ fn parse_var_type_definition( // parse array specifiers { let mut s = *chars; - while let Ok(Token::LeftSquareBracket) = next_token(&mut s) { + while let Token::LeftSquareBracket = next_token(&mut s)? { var_type = VariableTypeReference::Array(Box::new(var_type), parse_subscript(chars)?); s = chars; } } - let Ok(Token::Name(name)) = next_token(chars) else { + let Token::Name(name) = next_token(chars)? else { // TODO: add source snippet r_panic!("Expected name in variable definition."); }; @@ -192,15 +190,15 @@ fn parse_var_type_definition( /// parse the subscript of an array variable, e.g. [4] [6] [0] /// must be compile-time constant fn parse_subscript(chars: &mut &[char]) -> Result { - let Ok(Token::LeftSquareBracket) = next_token(chars) else { + let Token::LeftSquareBracket = next_token(chars)? else { // TODO: add program snippet r_panic!("Expected `[` in array subscript."); }; - let Ok(Token::Digits(digits)) = next_token(chars) else { + let Token::Digits(digits) = next_token(chars)? else { // TODO: add program snippet r_panic!("Expected natural number in array subscript."); }; - let Ok(Token::RightSquareBracket) = next_token(chars) else { + let Token::RightSquareBracket = next_token(chars)? else { // TODO: add program snippet r_panic!("Expected `]` in array subscript."); }; @@ -211,7 +209,7 @@ fn parse_subscript(chars: &mut &[char]) -> Result { pub fn parse_var_target(chars: &mut &[char]) -> Result { let is_spread = { let mut s = *chars; - if let Ok(Token::Asterisk) = next_token(&mut s) { + if let Token::Asterisk = next_token(&mut s)? { *chars = s; true } else { @@ -219,7 +217,7 @@ pub fn parse_var_target(chars: &mut &[char]) -> Result { } }; - let Ok(Token::Name(base_var_name)) = next_token(chars) else { + let Token::Name(base_var_name) = next_token(chars)? else { // TODO: add source snippet r_panic!("Expected identifier in variable target identifier."); }; @@ -227,20 +225,20 @@ pub fn parse_var_target(chars: &mut &[char]) -> Result { let mut ref_chain = vec![]; let mut s = *chars; loop { - match next_token(&mut s) { - Ok(Token::LeftSquareBracket) => { + match next_token(&mut s)? { + Token::LeftSquareBracket => { let index = parse_subscript(chars)?; ref_chain.push(Reference::Index(index)); } - Ok(Token::Dot) => { - let Ok(Token::Name(subfield_name)) = next_token(&mut s) else { + Token::Dot => { + let Token::Name(subfield_name) = next_token(&mut s)? else { // TODO: add source snippet r_panic!("Expected subfield name in variable target identifier."); }; ref_chain.push(Reference::NamedField(subfield_name)); } // TODO: add source snippet - Err(_) => r_panic!("Unexpected token found in variable target."), + _ => r_panic!("Unexpected token found in variable target."), _ => { break; } @@ -260,13 +258,13 @@ pub fn parse_var_target(chars: &mut &[char]) -> Result { } fn parse_integer(chars: &mut &[char]) -> Result { - let mut token = next_token(chars); + let mut token = next_token(chars)?; let mut is_negative = false; - if let Ok(Token::Minus) = token { + if let Token::Minus = token { is_negative = true; - token = next_token(chars); + token = next_token(chars)?; } - let Ok(Token::Digits(digits)) = token else { + let Token::Digits(digits) = token else { // TODO: add source snippet r_panic!("Expected integer.") }; @@ -280,7 +278,7 @@ fn parse_integer(chars: &mut &[char]) -> Result { } fn parse_integer_tuple(chars: &mut &[char]) -> Result<[i32; LENGTH], String> { - let Ok(Token::LeftParenthesis) = next_token(chars) else { + let Token::LeftParenthesis = next_token(chars)? else { // TODO: add source snippet r_panic!("Expected opening parenthesis in tuple.") }; @@ -290,13 +288,13 @@ fn parse_integer_tuple(chars: &mut &[char]) -> Result<[i32; *element = parse_integer(chars)?; if j < LENGTH - 1 { - let Ok(Token::Comma) = next_token(chars) else { + let Token::Comma = next_token(chars)? else { // TODO: add source snippet r_panic!("Expected comma in tuple."); }; } } - let Ok(Token::RightParenthesis) = next_token(chars) else { + let Token::RightParenthesis = next_token(chars)? else { // TODO: add source snippet r_panic!("Expected closing parenthesis in tuple."); }; @@ -311,14 +309,14 @@ fn parse_integer_tuple(chars: &mut &[char]) -> Result<[i32; fn parse_if_else_clause( chars: &mut &[char], ) -> Result, String> { - let Ok(Token::If) = next_token(chars) else { + let Token::If = next_token(chars)? else { // TODO: add program snippet r_panic!("Expected \"if\" in if-else clause."); }; let is_not = { let mut s = *chars; - if let Ok(Token::Not) = next_token(&mut s) { + if let Token::Not = next_token(&mut s)? { *chars = s; true } else { @@ -328,7 +326,7 @@ fn parse_if_else_clause( let condition = Expression::parse(chars)?; { let mut s = *chars; - let Ok(Token::LeftBrace) = next_token(&mut s) else { + let Token::LeftBrace = next_token(&mut s)? else { r_panic!("Expected code block in if-else clause."); }; } @@ -336,7 +334,7 @@ fn parse_if_else_clause( let block_two = { let mut s = *chars; - if let Ok(Token::Else) = next_token(&mut s) { + if let Token::Else = next_token(&mut s)? { *chars = s; Some(parse_block(chars)?) } else { @@ -369,7 +367,7 @@ fn parse_if_else_clause( fn parse_while_clause( chars: &mut &[char], ) -> Result, String> { - let Ok(Token::While) = next_token(chars) else { + let Token::While = next_token(chars)? else { // TODO: add source snippet r_panic!("Expected `while` in while clause."); }; @@ -382,7 +380,7 @@ fn parse_while_clause( { let mut s = *chars; - let Ok(Token::LeftBrace) = next_token(&mut s) else { + let Token::LeftBrace = next_token(&mut s)? else { r_panic!("Expected code block in while clause."); }; } @@ -397,17 +395,17 @@ fn parse_while_clause( fn parse_function_definition_clause( chars: &mut &[char], ) -> Result, String> { - let Ok(Token::Fn) = next_token(chars) else { + let Token::Fn = next_token(chars)? else { // TODO: add source snippet r_panic!("Expected `fn` in function definition clause."); }; - let Ok(Token::Name(function_name)) = next_token(chars) else { + let Token::Name(function_name) = next_token(chars)? else { // TODO: add source snippet r_panic!("Expected name in function definition clause."); }; - let Ok(Token::LeftParenthesis) = next_token(chars) else { + let Token::LeftParenthesis = next_token(chars)? else { // TODO: add source snippet r_panic!("Expected argument list in function definition clause."); }; @@ -415,16 +413,16 @@ fn parse_function_definition_clause( loop { { let mut s = *chars; - if let Ok(Token::RightParenthesis) = next_token(&mut s) { + if let Token::RightParenthesis = next_token(&mut s)? { *chars = s; break; } } arguments.push(parse_var_type_definition(chars)?); - match next_token(chars) { - Ok(Token::RightParenthesis) => break, - Ok(Token::Comma) => (), + match next_token(chars)? { + Token::RightParenthesis => break, + Token::Comma => (), // TODO: add source snippet _ => r_panic!("Unexpected token in function argument list."), } @@ -441,17 +439,17 @@ fn parse_function_definition_clause( fn parse_struct_definition_clause( chars: &mut &[char], ) -> Result, String> { - let Ok(Token::Struct) = next_token(chars) else { + let Token::Struct = next_token(chars)? else { // TODO: add source snippet r_panic!("Expected `struct` in struct definition."); }; - let Ok(Token::Name(name)) = next_token(chars) else { + let Token::Name(name) = next_token(chars)? else { // TODO: add source snippet r_panic!("Expected name in struct definition."); }; - let Ok(Token::LeftBrace) = next_token(chars) else { + let Token::LeftBrace = next_token(chars)? else { // TODO: add source snippet r_panic!("Expected `{{` in struct clause."); }; @@ -460,11 +458,11 @@ fn parse_struct_definition_clause( loop { let field = parse_var_type_definition::(chars)?; fields.push(field.try_into()?); - let Ok(Token::Semicolon) = next_token(chars) else { + let Token::Semicolon = next_token(chars)? else { // TODO: add source snippet r_panic!("Expected semicolon after struct definition field."); }; - if let Ok(Token::RightBrace) = next_token(chars) { + if let Token::RightBrace = next_token(chars)? { break; } } @@ -478,16 +476,15 @@ fn parse_let_clause(chars: &mut &[char]) -> Result Result { +pub fn next_token(chars: &mut &[char]) -> Result { // skip any whitespace skip_whitespace(chars); @@ -57,52 +59,87 @@ pub fn next_token(chars: &mut &[char]) -> Result { return Ok(Token::None); }; Ok(match *c { - ';' => { - *chars = &chars[1..]; - Token::Semicolon - } - '{' => { - *chars = &chars[1..]; - Token::LeftBrace - } - '}' => { - *chars = &chars[1..]; - Token::RightBrace - } - '(' => { - *chars = &chars[1..]; - Token::LeftParenthesis - } - ')' => { - *chars = &chars[1..]; - Token::RightParenthesis - } - '[' => { - *chars = &chars[1..]; - Token::LeftSquareBracket - } - ']' => { + c @ (';' | '{' | '}' | '(' | ')' | '[' | ']' | '.' | ',' | '*' | '@' | '+' | '-') => { *chars = &chars[1..]; - Token::RightSquareBracket + match c { + ';' => Token::Semicolon, + '{' => Token::LeftBrace, + '}' => Token::RightBrace, + '(' => Token::LeftParenthesis, + ')' => Token::RightParenthesis, + '[' => Token::LeftSquareBracket, + ']' => Token::RightSquareBracket, + '.' => Token::Dot, + ',' => Token::Comma, + '*' => Token::Asterisk, + '@' => Token::At, + '+' => match chars.get(1) { + Some('+') => { + *chars = &chars[1..]; + Token::PlusPlus + } + Some('=') => { + *chars = &chars[1..]; + Token::PlusEquals + } + _ => Token::Plus, + }, + '-' => match chars.get(0) { + Some('-') => { + *chars = &chars[1..]; + Token::MinusMinus + } + Some('=') => { + *chars = &chars[1..]; + Token::MinusEquals + } + _ => Token::Minus, + }, + _ => unreachable!(), + } } - '.' => { - *chars = &chars[1..]; - Token::Dot + '"' => Token::Character(parse_character_literal(chars)?), + '\'' => { + todo!(); } - ',' => { - *chars = &chars[1..]; - Token::Comma + '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => todo!(), + 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' + | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' | 'A' | 'B' | 'C' + | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' + | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | '_' => { + todo!(); } - '*' => { - *chars = &chars[1..]; - Token::Asterisk + _ => r_panic!("Invalid token found: `{c}`."), + }) +} + +/// handle character escape sequences +// supports Rust ASCII escapes +fn parse_character_literal(chars: &mut &[char]) -> Result { + match chars.get(1) { + Some('\\') => { + let c = match chars.get(2) { + Some(c) => match c { + '\'' => '\'', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + '\\' => '\\', + '0' => '\0', + // TODO: add source snippet + _ => r_panic!("Invalid escape sequence in character literal."), + }, + None => r_panic!("Expected escape sequence in character literal."), + }; + *chars = &chars[4..]; + return Ok(c); } - '@' => { - *chars = &chars[1..]; - Token::At + Some(c) => { + *chars = &chars[3..]; + return Ok(*c); } - _ => todo!(), - }) + None => r_panic!("Character literal must be length 1."), + }; } // TODO: fix this, make this based on token, currently it has no nuance for strings for example From 59852ad25de428eec2d740e72ddbb49e2ff250a5 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Mon, 10 Nov 2025 11:29:54 +1100 Subject: [PATCH 33/56] Add tokeniser tests for numbers --- compiler/src/parser/old_tokeniser.rs | 25 --- compiler/src/parser/tokeniser.rs | 256 +++++++++++++++++++++------ 2 files changed, 199 insertions(+), 82 deletions(-) diff --git a/compiler/src/parser/old_tokeniser.rs b/compiler/src/parser/old_tokeniser.rs index 29e2208..517432e 100644 --- a/compiler/src/parser/old_tokeniser.rs +++ b/compiler/src/parser/old_tokeniser.rs @@ -142,28 +142,3 @@ fn strip_line(line: &str) -> String { .collect::>() .join(" ") } - -/// handle string escape sequences -// supports Rust ASCII escapes -fn tokenise_raw_string_literal(raw: &str) -> Result { - let mut s_iter = raw.chars(); - let mut built_string = String::with_capacity(raw.len()); - while let Some(raw_char) = s_iter.next() { - built_string.push(match raw_char { - '\\' => match s_iter.next() { - Some(c) => match c { - '\"' => '"', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - '\\' => '\\', - '0' => '\0', - _ => r_panic!("Invalid escape sequence in string literal: {raw}"), - }, - None => r_panic!("Expected escape sequence in string literal: {raw}"), - }, - c => c, - }); - } - Ok(built_string) -} diff --git a/compiler/src/parser/tokeniser.rs b/compiler/src/parser/tokeniser.rs index 7d425d3..a8b05a8 100644 --- a/compiler/src/parser/tokeniser.rs +++ b/compiler/src/parser/tokeniser.rs @@ -36,7 +36,7 @@ pub enum Token { Asterisk, At, Name(String), - Digits(String), + NaturalNumber(usize), String(String), Character(char), Plus, @@ -52,7 +52,17 @@ pub enum Token { /// Get the next token from chars, advance the passed in pointer pub fn next_token(chars: &mut &[char]) -> Result { // skip any whitespace - skip_whitespace(chars); + loop { + match chars.get(0) { + Some(c) => { + if !c.is_whitespace() { + break; + } + } + None => break, + } + *chars = &chars[1..]; + } // read the first character and branch from there let Some(c) = chars.get(0) else { @@ -98,23 +108,28 @@ pub fn next_token(chars: &mut &[char]) -> Result { _ => unreachable!(), } } - '"' => Token::Character(parse_character_literal(chars)?), - '\'' => { - todo!(); + '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => { + Token::NaturalNumber(parse_number(chars)?) } - '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => todo!(), 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' - | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | '_' => { - todo!(); - } + | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | '_' => {} + '\'' => Token::Character(parse_character_literal(chars)?), + '"' => Token::String(parse_string_literal(chars)?), _ => r_panic!("Invalid token found: `{c}`."), }) } -/// handle character escape sequences -// supports Rust ASCII escapes +fn parse_number(chars: &mut &[char]) -> Result { + todo!() +} + +fn parse_word(chars: &mut &[char]) -> Result { + todo!() +} + +/// handle character escape sequences, supports Rust ASCII escapes fn parse_character_literal(chars: &mut &[char]) -> Result { match chars.get(1) { Some('\\') => { @@ -142,58 +157,39 @@ fn parse_character_literal(chars: &mut &[char]) -> Result { }; } -// TODO: fix this, make this based on token, currently it has no nuance for strings for example -// TODO: figure out errors for these helper functions -pub fn find_next(chars: &[char], character: char) -> Result { +/// handle string escape sequences, supports Rust ASCII escapes +fn parse_string_literal(chars: &mut &[char]) -> Result { + let mut built_string = String::new(); let mut i = 0; loop { - let Some(c) = chars.get(i) else { - return Err(()); - }; - - if *c == character { - break; - } - i += 1; - } - Ok(i) -} - -// TODO: fix this, make this based on token, currently it has no nuance for strings for example -pub fn find_and_advance<'a>(chars: &'a mut &[char], character: char) -> Result<&'a [char], ()> { - let substr_len = find_next(chars, character)?; - let chars_before = &chars[..substr_len]; - *chars = &chars[substr_len..]; - Ok(chars_before) -} - -pub fn skip_whitespace(chars: &mut &[char]) { - loop { - match chars.get(0) { - Some(c) => { - if !c.is_whitespace() { - break; - } + match chars.get(i) { + None => r_panic!("Unexpected end of input in string literal."), + Some('\\') => { + i += 1; + built_string.push(match chars.get(i) { + Some('\"') => '"', + Some('n') => '\n', + Some('r') => '\r', + Some('t') => '\t', + Some('\\') => '\\', + Some('0') => '\0', + // TODO: add source snippet + _ => r_panic!("Invalid escape sequence in string literal."), + }); + i += 1; } - None => break, + Some('"') => { + i += 1; + break; + } + Some(c) => built_string.push(*c), } - *chars = &chars[1..]; } -} + // panicking assertion: TODO: make sure slices can become 0 length, e.g. chars.len() = 3, i = 3? + assert!(i <= chars.len()); + *chars = &chars[i..]; -pub fn find_next_whitespace(chars: &[char]) -> Result { - let mut i = 0; - loop { - let Some(c) = chars.get(i) else { - return Err(()); - }; - - if c.is_whitespace() { - break; - } - i += 1; - } - Ok(i) + Ok(built_string) } #[cfg(test)] @@ -681,4 +677,150 @@ if fn{output)true)false -while* @copy@+=@drain-=into=][bf.cell"#, ], ); } + + #[test] + fn numbers_dec_1() { + _tokenisation_test( + "1 123 000098763", + &[ + Token::String(String::from("\"")), + Token::String(String::from(" ")), + ], + ); + } + + #[test] + fn numbers_dec_2() { + _tokenisation_test( + ".0654 567.32", + &[ + Token::Dot, + Token::NaturalNumber(654), + Token::NaturalNumber(567), + Token::Dot, + Token::NaturalNumber(32), + ], + ); + } + + #[test] + #[ignore] + fn numbers_hex_1() { + _tokenisation_test( + "0x56 0x00 0x00ff1 0x4ff2", + &[ + Token::NaturalNumber(0x56), + Token::NaturalNumber(0x00), + Token::NaturalNumber(0xff1), + Token::NaturalNumber(0x4ff2), + ], + ); + } + + #[test] + #[ignore] + fn numbers_hex_1a() { + _tokenisation_test( + "0x 56 0x00 0x00f f1 0 x4ff2", + &[ + Token::NaturalNumber(0), + Token::Name(String::from("x")), + Token::NaturalNumber(56), + Token::NaturalNumber(0x00), + Token::NaturalNumber(0x00f), + Token::Name(String::from("f1")), + Token::NaturalNumber(0), + Token::Name(String::from("x4ff2")), + ], + ); + } + + #[test] + #[ignore] + fn numbers_hex_2() { + _tokenisation_test( + "0x56 0x00 0x00ff1 0x4ff2", + &[ + Token::NaturalNumber(0x56), + Token::NaturalNumber(0x00), + Token::NaturalNumber(0xff1), + Token::NaturalNumber(0x4ff2), + ], + ); + } + + #[test] + #[ignore] + fn numbers_hex_2a() { + _tokenisation_test( + "0x 56 0x00 0x00f f1 0 x4ff2", + &[ + Token::NaturalNumber(0), + Token::Name(String::from("x")), + Token::NaturalNumber(56), + Token::NaturalNumber(0x00), + Token::NaturalNumber(0x00f), + Token::Name(String::from("f1")), + Token::NaturalNumber(0), + Token::Name(String::from("x4ff2")), + ], + ); + } + + #[test] + #[ignore] + fn numbers_bin_1() { + _tokenisation_test( + "0b1111 0b000 0b0 0b1 0b1010100 0b001101", + &[ + Token::NaturalNumber(0b1111), + Token::NaturalNumber(0b000), + Token::NaturalNumber(0b0), + Token::NaturalNumber(0b1), + Token::NaturalNumber(0b1010100), + Token::NaturalNumber(0b001101), + ], + ); + } + + #[test] + #[ignore] + fn numbers_bin_1a() { + _tokenisation_test( + "0b1 111 0 b000 0b 0 0b1 0b101 0100 0b001101", + &[ + Token::NaturalNumber(0b1), + Token::NaturalNumber(111), + Token::NaturalNumber(0), + Token::Name(String::from("b000")), + Token::NaturalNumber(0), + Token::Name(String::from("b")), + Token::NaturalNumber(0), + Token::NaturalNumber(0b1), + Token::NaturalNumber(0b101), + Token::NaturalNumber(100), + Token::NaturalNumber(0b1101), + ], + ); + } + + #[test] + #[ignore] + fn numbers_hex_bin_1() { + _tokenisation_test( + "0x11001 0b11001", + &[Token::NaturalNumber(0x11001), Token::NaturalNumber(0b11001)], + ); + } + + #[test] + #[ignore] + fn numbers_hex_bin_2() { + for s in [ + "0b00102", "0b013000", "0b010040", "0b050000", "0b66000", "0b017", "0b8", "0b90", + "0b01a0", "0b4b", "0b01c0", "0b0d", "0b01e0", "0b01f", + ] { + assert_eq!(tokenise(s).unwrap_err(), ""); + } + } } From 0eb3055c9e7d3e27f62630455affe85bbd7c8e2c Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Mon, 10 Nov 2025 12:30:18 +1100 Subject: [PATCH 34/56] Implement number tokeniser --- compiler/src/parser/expressions.rs | 4 +- compiler/src/parser/parser.rs | 17 +-- compiler/src/parser/tokeniser.rs | 227 +++++++++++++++++++++-------- 3 files changed, 173 insertions(+), 75 deletions(-) diff --git a/compiler/src/parser/expressions.rs b/compiler/src/parser/expressions.rs index 1a5b7b1..078de0b 100644 --- a/compiler/src/parser/expressions.rs +++ b/compiler/src/parser/expressions.rs @@ -109,7 +109,7 @@ impl Expression { ( Some(sign), Ok( - token @ (Token::Digits(_) + token @ (Token::Number(_) | Token::Character(_) | Token::True | Token::False), @@ -117,7 +117,7 @@ impl Expression { ) => { *chars = s; let parsed_int = match token { - Token::Digits(digits) => digits.parse::().unwrap(), + Token::Number(number) => number, Token::Character(c) => { let chr_int = c as usize; r_assert!(chr_int < 0xff, "Character tokens must be single-byte: {c}"); diff --git a/compiler/src/parser/parser.rs b/compiler/src/parser/parser.rs index fe91b2d..86ccecb 100644 --- a/compiler/src/parser/parser.rs +++ b/compiler/src/parser/parser.rs @@ -97,7 +97,7 @@ impl TapeCellLocation for TapeCell { *chars = s; match next_token(&mut s)? { - Token::Minus | Token::Digits(_) => Ok(LocationSpecifier::Cell(parse_integer(chars)?)), + Token::Minus | Token::Number(_) => Ok(LocationSpecifier::Cell(parse_integer(chars)?)), // variable location specifier: Token::Name(_) => Ok(LocationSpecifier::Variable(parse_var_target(chars)?)), // TODO: add source snippet @@ -127,7 +127,7 @@ impl TapeCellLocation for TapeCell2D { let tuple = parse_integer_tuple::<2>(chars)?; Ok(LocationSpecifier::Cell(TapeCell2D(tuple[0], tuple[1]))) } - Token::Minus | Token::Digits(_) => Ok(LocationSpecifier::Cell(TapeCell2D( + Token::Minus | Token::Number(_) => Ok(LocationSpecifier::Cell(TapeCell2D( parse_integer(chars)?, 0, ))), @@ -194,7 +194,7 @@ fn parse_subscript(chars: &mut &[char]) -> Result { // TODO: add program snippet r_panic!("Expected `[` in array subscript."); }; - let Token::Digits(digits) = next_token(chars)? else { + let Token::Number(number) = next_token(chars)? else { // TODO: add program snippet r_panic!("Expected natural number in array subscript."); }; @@ -203,7 +203,7 @@ fn parse_subscript(chars: &mut &[char]) -> Result { r_panic!("Expected `]` in array subscript."); }; // TODO: handle errors here - Ok(digits.parse::().unwrap()) + Ok(number) } pub fn parse_var_target(chars: &mut &[char]) -> Result { @@ -237,11 +237,7 @@ pub fn parse_var_target(chars: &mut &[char]) -> Result { }; ref_chain.push(Reference::NamedField(subfield_name)); } - // TODO: add source snippet - _ => r_panic!("Unexpected token found in variable target."), - _ => { - break; - } + _ => break, } *chars = s; } @@ -264,12 +260,11 @@ fn parse_integer(chars: &mut &[char]) -> Result { is_negative = true; token = next_token(chars)?; } - let Token::Digits(digits) = token else { + let Token::Number(magnitude) = token else { // TODO: add source snippet r_panic!("Expected integer.") }; // TODO: handle errors here - let magnitude = digits.parse::().unwrap(); Ok(match is_negative { // TODO: truncation error handling false => magnitude as i32, diff --git a/compiler/src/parser/tokeniser.rs b/compiler/src/parser/tokeniser.rs index a8b05a8..a21faa5 100644 --- a/compiler/src/parser/tokeniser.rs +++ b/compiler/src/parser/tokeniser.rs @@ -35,18 +35,18 @@ pub enum Token { Dot, Asterisk, At, - Name(String), - NaturalNumber(usize), - String(String), - Character(char), Plus, Minus, + EqualsSign, + Semicolon, PlusPlus, MinusMinus, PlusEquals, MinusEquals, - EqualsSign, - Semicolon, + Name(String), + Number(usize), + String(String), + Character(char), } /// Get the next token from chars, advance the passed in pointer @@ -83,7 +83,7 @@ pub fn next_token(chars: &mut &[char]) -> Result { ',' => Token::Comma, '*' => Token::Asterisk, '@' => Token::At, - '+' => match chars.get(1) { + '+' => match chars.get(0) { Some('+') => { *chars = &chars[1..]; Token::PlusPlus @@ -109,12 +109,36 @@ pub fn next_token(chars: &mut &[char]) -> Result { } } '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => { - Token::NaturalNumber(parse_number(chars)?) + Token::Number(parse_number(chars)?) } 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' - | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | '_' => {} + | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | '_' => { + let word = parse_word(chars)?; + match word.as_str() { + "output" => Token::Output, + "input" => Token::Input, + "fn" => Token::Fn, + "cell" => Token::Cell, + "struct" => Token::Struct, + "while" => Token::While, + "if" => Token::If, + "not" => Token::Not, + "else" => Token::Else, + "copy" => Token::Copy, + "drain" => Token::Drain, + "into" => Token::Into, + "bf" => Token::Bf, + "clobbers" => Token::Clobbers, + "assert" => Token::Assert, + "equals" => Token::Equals, + "unknown" => Token::Unknown, + "true" => Token::True, + "false" => Token::False, + _ => Token::Name(word), + } + } '\'' => Token::Character(parse_character_literal(chars)?), '"' => Token::String(parse_string_literal(chars)?), _ => r_panic!("Invalid token found: `{c}`."), @@ -122,7 +146,50 @@ pub fn next_token(chars: &mut &[char]) -> Result { } fn parse_number(chars: &mut &[char]) -> Result { - todo!() + // parse hexadecimal and binary + // if let Some('0') = chars.get(0) { + // match chars.get(1) { + // // Some('x') => { + // // let mut i = 2; + // // } + // // Some('b') => { + // // let mut i = 2; + // // } + // _ => (), + // } + // } + + // parse decimal natural number + let mut i = 0; + let mut n = 0; + loop { + let Some(digit) = chars.get(i) else { + break; + }; + match digit { + c @ ('0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9') => { + n *= 10; + n += (*c as usize) - ('0' as usize); + } + // '_' => { + // // TODO: support underscores in number literals? + // } + ';' | '{' | '}' | '(' | ')' | '[' | ']' | '.' | ',' | '*' | '@' | '+' | '-' => break, + c if c.is_whitespace() => break, + 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' + | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' | 'A' | 'B' + | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' + | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | '_' => { + // TODO: add source snippet + r_panic!("Unexpected word character in number token.") + } + // TODO: add source snippet + _ => r_panic!("Unknown character found while parsing number token."), + } + i += 1; + } + + Ok(n) } fn parse_word(chars: &mut &[char]) -> Result { @@ -131,9 +198,15 @@ fn parse_word(chars: &mut &[char]) -> Result { /// handle character escape sequences, supports Rust ASCII escapes fn parse_character_literal(chars: &mut &[char]) -> Result { - match chars.get(1) { + let mut i = 0; + let Some('\'') = chars.get(i) else { + r_panic!("Expected `'` at start of character literal."); + }; + i += 1; + let c = match chars.get(i) { Some('\\') => { - let c = match chars.get(2) { + i += 1; + let c = match chars.get(i) { Some(c) => match c { '\'' => '\'', 'n' => '\n', @@ -146,21 +219,32 @@ fn parse_character_literal(chars: &mut &[char]) -> Result { }, None => r_panic!("Expected escape sequence in character literal."), }; - *chars = &chars[4..]; - return Ok(c); - } - Some(c) => { - *chars = &chars[3..]; - return Ok(*c); + c } + Some(c) => *c, None => r_panic!("Character literal must be length 1."), }; + i += 1; + let Some('\'') = chars.get(i) else { + r_panic!("Expected `'` at end of character literal."); + }; + i += 1; + + // update used characters + assert!(i <= chars.len()); + *chars = &chars[i..]; + + Ok(c) } /// handle string escape sequences, supports Rust ASCII escapes fn parse_string_literal(chars: &mut &[char]) -> Result { let mut built_string = String::new(); let mut i = 0; + let Some('"') = chars.get(i) else { + r_panic!("Expected `\"` at start of string literal."); + }; + i += 1; loop { match chars.get(i) { None => r_panic!("Unexpected end of input in string literal."), @@ -176,16 +260,18 @@ fn parse_string_literal(chars: &mut &[char]) -> Result { // TODO: add source snippet _ => r_panic!("Invalid escape sequence in string literal."), }); - i += 1; - } - Some('"') => { - i += 1; - break; } + Some('"') => break, Some(c) => built_string.push(*c), } } - // panicking assertion: TODO: make sure slices can become 0 length, e.g. chars.len() = 3, i = 3? + i += 1; + let Some('"') = chars.get(i) else { + r_panic!("Expected `\"` at end of string literal."); + }; + i += 1; + + // update used characters assert!(i <= chars.len()); *chars = &chars[i..]; @@ -695,10 +781,10 @@ if fn{output)true)false -while* @copy@+=@drain-=into=][bf.cell"#, ".0654 567.32", &[ Token::Dot, - Token::NaturalNumber(654), - Token::NaturalNumber(567), + Token::Number(654), + Token::Number(567), Token::Dot, - Token::NaturalNumber(32), + Token::Number(32), ], ); } @@ -709,10 +795,10 @@ if fn{output)true)false -while* @copy@+=@drain-=into=][bf.cell"#, _tokenisation_test( "0x56 0x00 0x00ff1 0x4ff2", &[ - Token::NaturalNumber(0x56), - Token::NaturalNumber(0x00), - Token::NaturalNumber(0xff1), - Token::NaturalNumber(0x4ff2), + Token::Number(0x56), + Token::Number(0x00), + Token::Number(0xff1), + Token::Number(0x4ff2), ], ); } @@ -723,13 +809,13 @@ if fn{output)true)false -while* @copy@+=@drain-=into=][bf.cell"#, _tokenisation_test( "0x 56 0x00 0x00f f1 0 x4ff2", &[ - Token::NaturalNumber(0), + Token::Number(0), Token::Name(String::from("x")), - Token::NaturalNumber(56), - Token::NaturalNumber(0x00), - Token::NaturalNumber(0x00f), + Token::Number(56), + Token::Number(0x00), + Token::Number(0x00f), Token::Name(String::from("f1")), - Token::NaturalNumber(0), + Token::Number(0), Token::Name(String::from("x4ff2")), ], ); @@ -741,10 +827,10 @@ if fn{output)true)false -while* @copy@+=@drain-=into=][bf.cell"#, _tokenisation_test( "0x56 0x00 0x00ff1 0x4ff2", &[ - Token::NaturalNumber(0x56), - Token::NaturalNumber(0x00), - Token::NaturalNumber(0xff1), - Token::NaturalNumber(0x4ff2), + Token::Number(0x56), + Token::Number(0x00), + Token::Number(0xff1), + Token::Number(0x4ff2), ], ); } @@ -755,13 +841,13 @@ if fn{output)true)false -while* @copy@+=@drain-=into=][bf.cell"#, _tokenisation_test( "0x 56 0x00 0x00f f1 0 x4ff2", &[ - Token::NaturalNumber(0), + Token::Number(0), Token::Name(String::from("x")), - Token::NaturalNumber(56), - Token::NaturalNumber(0x00), - Token::NaturalNumber(0x00f), + Token::Number(56), + Token::Number(0x00), + Token::Number(0x00f), Token::Name(String::from("f1")), - Token::NaturalNumber(0), + Token::Number(0), Token::Name(String::from("x4ff2")), ], ); @@ -773,12 +859,12 @@ if fn{output)true)false -while* @copy@+=@drain-=into=][bf.cell"#, _tokenisation_test( "0b1111 0b000 0b0 0b1 0b1010100 0b001101", &[ - Token::NaturalNumber(0b1111), - Token::NaturalNumber(0b000), - Token::NaturalNumber(0b0), - Token::NaturalNumber(0b1), - Token::NaturalNumber(0b1010100), - Token::NaturalNumber(0b001101), + Token::Number(0b1111), + Token::Number(0b000), + Token::Number(0b0), + Token::Number(0b1), + Token::Number(0b1010100), + Token::Number(0b001101), ], ); } @@ -787,19 +873,19 @@ if fn{output)true)false -while* @copy@+=@drain-=into=][bf.cell"#, #[ignore] fn numbers_bin_1a() { _tokenisation_test( - "0b1 111 0 b000 0b 0 0b1 0b101 0100 0b001101", + "0b1 111 0 b000 0 b 0 0b1 0b101 0100 0b001101", &[ - Token::NaturalNumber(0b1), - Token::NaturalNumber(111), - Token::NaturalNumber(0), + Token::Number(0b1), + Token::Number(111), + Token::Number(0), Token::Name(String::from("b000")), - Token::NaturalNumber(0), + Token::Number(0), Token::Name(String::from("b")), - Token::NaturalNumber(0), - Token::NaturalNumber(0b1), - Token::NaturalNumber(0b101), - Token::NaturalNumber(100), - Token::NaturalNumber(0b1101), + Token::Number(0), + Token::Number(0b1), + Token::Number(0b101), + Token::Number(100), + Token::Number(0b1101), ], ); } @@ -809,7 +895,7 @@ if fn{output)true)false -while* @copy@+=@drain-=into=][bf.cell"#, fn numbers_hex_bin_1() { _tokenisation_test( "0x11001 0b11001", - &[Token::NaturalNumber(0x11001), Token::NaturalNumber(0b11001)], + &[Token::Number(0x11001), Token::Number(0b11001)], ); } @@ -823,4 +909,21 @@ if fn{output)true)false -while* @copy@+=@drain-=into=][bf.cell"#, assert_eq!(tokenise(s).unwrap_err(), ""); } } + + #[test] + fn numbers_and_words_dec() { + assert_eq!(tokenise("456hello").unwrap_err(), ""); + } + + #[test] + #[ignore] + fn numbers_and_words_hex() { + assert_eq!(tokenise("0x00free me").unwrap_err(), ""); + } + + #[test] + #[ignore] + fn numbers_and_words_bin() { + assert_eq!(tokenise("0b00ebrave").unwrap_err(), ""); + } } From dc869866dee221a2875884a3649530c9dbb429e8 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Mon, 10 Nov 2025 13:06:38 +1100 Subject: [PATCH 35/56] Fix infinite loop in number parser --- compiler/src/parser/tokeniser.rs | 43 +++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/compiler/src/parser/tokeniser.rs b/compiler/src/parser/tokeniser.rs index a21faa5..ccbad41 100644 --- a/compiler/src/parser/tokeniser.rs +++ b/compiler/src/parser/tokeniser.rs @@ -189,6 +189,10 @@ fn parse_number(chars: &mut &[char]) -> Result { i += 1; } + // update used characters + assert!(i <= chars.len()); + *chars = &chars[i..]; + Ok(n) } @@ -221,12 +225,13 @@ fn parse_character_literal(chars: &mut &[char]) -> Result { }; c } + Some('\'') => r_panic!("Unexpected `'` in character literal, must be length 1."), Some(c) => *c, - None => r_panic!("Character literal must be length 1."), + None => r_panic!("Unexpected end of input found while parsing character literal."), }; i += 1; let Some('\'') = chars.get(i) else { - r_panic!("Expected `'` at end of character literal."); + r_panic!("Expected `'` at end of character literal. Character literals must be length 1."); }; i += 1; @@ -264,8 +269,9 @@ fn parse_string_literal(chars: &mut &[char]) -> Result { Some('"') => break, Some(c) => built_string.push(*c), } + i += 1; } - i += 1; + let Some('"') = chars.get(i) else { r_panic!("Expected `\"` at end of string literal."); }; @@ -280,8 +286,6 @@ fn parse_string_literal(chars: &mut &[char]) -> Result { #[cfg(test)] mod tokeniser_tests { - use crate::macros::macros::r_panic; - use super::*; fn tokenise(input_str: &str) -> Result, String> { @@ -289,9 +293,7 @@ mod tokeniser_tests { let mut chars_slice = &chars_vec[..]; let mut tokens = vec![]; loop { - let Ok(token) = next_token(&mut chars_slice) else { - r_panic!("Invalid token in input."); - }; + let token = next_token(&mut chars_slice)?; if let Token::None = token { break; } @@ -721,15 +723,27 @@ if fn{output)true)false -while* @copy@+=@drain-=into=][bf.cell"#, } #[test] - #[should_panic] fn character_literals_5() { - _tokenisation_test(r#"'\'"#, &[Token::Character('\\')]); + assert_eq!( + tokenise(r#"'\'"#).unwrap_err(), + "Expected `'` at end of character literal. Character literals must be length 1." + ); } #[test] - #[should_panic] fn character_literals_6() { - _tokenisation_test(r#"'aa'"#, &[Token::String(String::from("aa"))]); + assert_eq!( + tokenise(r#"'aa'"#).unwrap_err(), + "Expected `'` at end of character literal. Character literals must be length 1." + ); + } + + #[test] + fn character_literals_7() { + assert_eq!( + tokenise(r#"''"#).unwrap_err(), + "Unexpected `'` in character literal, must be length 1." + ); } #[test] @@ -769,8 +783,9 @@ if fn{output)true)false -while* @copy@+=@drain-=into=][bf.cell"#, _tokenisation_test( "1 123 000098763", &[ - Token::String(String::from("\"")), - Token::String(String::from(" ")), + Token::Number(1), + Token::Number(123), + Token::Number(000098763), ], ); } From d9ca92fe8cbf49f301d18808da06f42561e11e1b Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Mon, 10 Nov 2025 13:34:56 +1100 Subject: [PATCH 36/56] Fix tokeniser tests, implement word parser --- compiler/src/parser/tokeniser.rs | 78 ++++++++++++++++++++++++-------- 1 file changed, 59 insertions(+), 19 deletions(-) diff --git a/compiler/src/parser/tokeniser.rs b/compiler/src/parser/tokeniser.rs index ccbad41..f89c7a8 100644 --- a/compiler/src/parser/tokeniser.rs +++ b/compiler/src/parser/tokeniser.rs @@ -69,7 +69,7 @@ pub fn next_token(chars: &mut &[char]) -> Result { return Ok(Token::None); }; Ok(match *c { - c @ (';' | '{' | '}' | '(' | ')' | '[' | ']' | '.' | ',' | '*' | '@' | '+' | '-') => { + c @ (';' | '{' | '}' | '(' | ')' | '[' | ']' | '.' | ',' | '*' | '@' | '=' | '+' | '-') => { *chars = &chars[1..]; match c { ';' => Token::Semicolon, @@ -83,6 +83,7 @@ pub fn next_token(chars: &mut &[char]) -> Result { ',' => Token::Comma, '*' => Token::Asterisk, '@' => Token::At, + '=' => Token::EqualsSign, '+' => match chars.get(0) { Some('+') => { *chars = &chars[1..]; @@ -197,7 +198,40 @@ fn parse_number(chars: &mut &[char]) -> Result { } fn parse_word(chars: &mut &[char]) -> Result { - todo!() + let mut i = 0; + let mut parsed_word = String::new(); + + { + let Some( + c @ ('a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' + | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' | 'A' + | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' + | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | '_'), + ) = chars.get(i) + else { + r_panic!("Expected non-numeral character at start of word."); + }; + parsed_word.push(*c); + i += 1; + } + + while let Some( + c @ ('a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' + | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' | 'A' | 'B' + | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' + | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | '_' | '0' | '1' | '2' + | '3' | '4' | '5' | '6' | '7' | '8' | '9'), + ) = chars.get(i) + { + parsed_word.push(*c); + i += 1; + } + + // update used characters + assert!(i <= chars.len()); + *chars = &chars[i..]; + + Ok(parsed_word) } /// handle character escape sequences, supports Rust ASCII escapes @@ -244,7 +278,7 @@ fn parse_character_literal(chars: &mut &[char]) -> Result { /// handle string escape sequences, supports Rust ASCII escapes fn parse_string_literal(chars: &mut &[char]) -> Result { - let mut built_string = String::new(); + let mut parsed_string = String::new(); let mut i = 0; let Some('"') = chars.get(i) else { r_panic!("Expected `\"` at start of string literal."); @@ -255,7 +289,7 @@ fn parse_string_literal(chars: &mut &[char]) -> Result { None => r_panic!("Unexpected end of input in string literal."), Some('\\') => { i += 1; - built_string.push(match chars.get(i) { + parsed_string.push(match chars.get(i) { Some('\"') => '"', Some('n') => '\n', Some('r') => '\r', @@ -267,7 +301,7 @@ fn parse_string_literal(chars: &mut &[char]) -> Result { }); } Some('"') => break, - Some(c) => built_string.push(*c), + Some(c) => parsed_string.push(*c), } i += 1; } @@ -281,7 +315,7 @@ fn parse_string_literal(chars: &mut &[char]) -> Result { assert!(i <= chars.len()); *chars = &chars[i..]; - Ok(built_string) + Ok(parsed_string) } #[cfg(test)] @@ -441,8 +475,8 @@ mod tokeniser_tests { Token::Plus, Token::PlusEquals, Token::Plus, - Token::Plus, - Token::PlusEquals, + Token::PlusPlus, + Token::EqualsSign, Token::Plus, Token::MinusEquals, Token::MinusMinus, @@ -480,8 +514,7 @@ mod tokeniser_tests { Token::Asterisk, Token::MinusMinus, Token::MinusEquals, - Token::Plus, - Token::Plus, + Token::PlusPlus, Token::Asterisk, Token::At, Token::At, @@ -489,7 +522,8 @@ mod tokeniser_tests { Token::LeftSquareBracket, Token::LeftBrace, Token::LeftBrace, - Token::PlusPlus, + Token::Plus, + Token::Plus, Token::LeftParenthesis, Token::LeftParenthesis, Token::RightSquareBracket, @@ -585,8 +619,8 @@ if fn{output)true)false -while* @copy@+=@drain-=into=][bf.cell"#, Token::Minus, Token::Plus, Token::Input, - Token::Plus, Token::PlusPlus, + Token::Plus, Token::Not, Token::LeftParenthesis, Token::Else, @@ -640,10 +674,7 @@ if fn{output)true)false -while* @copy@+=@drain-=into=][bf.cell"#, #[test] fn names_2a() { - _tokenisation_test( - "while_", - &[Token::While, Token::Name(String::from("while_"))], - ); + _tokenisation_test("while_", &[Token::Name(String::from("while_"))]); } #[test] @@ -927,18 +958,27 @@ if fn{output)true)false -while* @copy@+=@drain-=into=][bf.cell"#, #[test] fn numbers_and_words_dec() { - assert_eq!(tokenise("456hello").unwrap_err(), ""); + assert_eq!( + tokenise("456hello").unwrap_err(), + "Unexpected word character in number token." + ); } #[test] #[ignore] fn numbers_and_words_hex() { - assert_eq!(tokenise("0x00free me").unwrap_err(), ""); + assert_eq!( + tokenise("0x00free me").unwrap_err(), + "Unexpected word character in number token." + ); } #[test] #[ignore] fn numbers_and_words_bin() { - assert_eq!(tokenise("0b00ebrave").unwrap_err(), ""); + assert_eq!( + tokenise("0b00ebrave").unwrap_err(), + "Unexpected word character in number token." + ); } } From 948e9a66445d39a822c0c0ed3c5149f91e0fbdf5 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Mon, 10 Nov 2025 13:59:36 +1100 Subject: [PATCH 37/56] Fix infinite loop in expression array parser --- compiler/src/parser/expressions.rs | 8 +++----- compiler/src/parser/parser.rs | 6 +++++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/compiler/src/parser/expressions.rs b/compiler/src/parser/expressions.rs index 078de0b..9776f00 100644 --- a/compiler/src/parser/expressions.rs +++ b/compiler/src/parser/expressions.rs @@ -59,11 +59,9 @@ impl Expression { let mut expressions = vec![]; loop { expressions.push(Self::parse(chars)?); - match next_token(chars) { - Ok(Token::RightSquareBracket) => break, - Ok(Token::Comma) => { - *chars = s; - } + match next_token(chars)? { + Token::RightSquareBracket => break, + Token::Comma => (), _ => r_panic!("Unexpected token in array literal."), } } diff --git a/compiler/src/parser/parser.rs b/compiler/src/parser/parser.rs index 86ccecb..096d02f 100644 --- a/compiler/src/parser/parser.rs +++ b/compiler/src/parser/parser.rs @@ -53,7 +53,11 @@ fn parse_clause( } } Token::Cell => parse_let_clause(chars)?, - _ => r_panic!("Invalid starting token."), + Token::Name(_) => match next_token(&mut s)? { + Token::LeftParenthesis => todo!(), + _ => todo!(), + }, + token => r_panic!("Invalid starting token: {token:?}"), }) } From 2a74e5ab76c46e61a876d41f1d2d74be14f806c3 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Mon, 10 Nov 2025 15:24:16 +1100 Subject: [PATCH 38/56] Refactor parser tests and fix expressions bugs --- compiler/src/frontend.rs | 4 +- compiler/src/parser/expressions.rs | 107 ++-- compiler/src/parser/parser.rs | 30 +- compiler/src/parser/tests.rs | 802 +++++++++++++++-------------- compiler/src/parser/tokeniser.rs | 182 +++++-- compiler/src/parser/types.rs | 4 +- 6 files changed, 643 insertions(+), 486 deletions(-) diff --git a/compiler/src/frontend.rs b/compiler/src/frontend.rs index 3799136..309f944 100644 --- a/compiler/src/frontend.rs +++ b/compiler/src/frontend.rs @@ -227,7 +227,7 @@ impl MastermindContext { } } } - Clause::InputVariable { var } => match var.is_spread { + Clause::Input { var } => match var.is_spread { false => { let cell = scope.get_cell(&var)?; scope.push_instruction(Instruction::InputToCell(cell)); @@ -239,7 +239,7 @@ impl MastermindContext { } } }, - Clause::OutputValue { value } => { + Clause::Output { value } => { match value { Expression::VariableReference(var) => match var.is_spread { false => { diff --git a/compiler/src/parser/expressions.rs b/compiler/src/parser/expressions.rs index 9776f00..e0c4100 100644 --- a/compiler/src/parser/expressions.rs +++ b/compiler/src/parser/expressions.rs @@ -41,10 +41,18 @@ impl Expression { // Iterators? // TODO: support post/pre increment in expressions pub fn parse(chars: &mut &[char]) -> Result { + // parse string expressions { let mut s = *chars; - if let Ok(Token::String(literal)) = next_token(&mut s) { - let Ok(Token::None) = next_token(&mut s) else { + if let Token::String(literal) = next_token(&mut s)? { + *chars = s; + let (Token::RightParenthesis + | Token::Semicolon + | Token::Comma + | Token::LeftBrace + | Token::Into + | Token::None) = next_token(&mut s)? + else { // TODO: add source snippet r_panic!("String literal must entirely comprise expression."); }; @@ -52,66 +60,73 @@ impl Expression { } } + // parse array expressions { let mut s = *chars; if let Ok(Token::LeftSquareBracket) = next_token(&mut s) { *chars = s; let mut expressions = vec![]; loop { - expressions.push(Self::parse(chars)?); - match next_token(chars)? { - Token::RightSquareBracket => break, - Token::Comma => (), - _ => r_panic!("Unexpected token in array literal."), + let mut s = *chars; + match (expressions.is_empty(), next_token(&mut s)?) { + (_, Token::RightSquareBracket) => { + *chars = s; + break; + } + (_, Token::Comma) => { + *chars = s; + expressions.push(Self::parse(chars)?); + } + (true, _) => expressions.push(Self::parse(chars)?), + _ => unreachable!(), } } - s = *chars; + // check for delimiters - let Ok( - Token::Semicolon - | Token::Comma - | Token::RightParenthesis - | Token::RightSquareBracket - | Token::None, - ) = next_token(&mut s) - else { - // TODO: add source snippet - r_panic!("Array literal must entirely comprise expression."); - }; + { + let mut s = *chars; + let Ok( + Token::Semicolon + | Token::Comma + | Token::RightParenthesis + | Token::RightSquareBracket + | Token::None, + ) = next_token(&mut s) + else { + // TODO: add source snippet + r_panic!("Array literal must entirely comprise expression."); + }; + } return Ok(Expression::ArrayLiteral(expressions)); } } + // parse arithmetic or variable expressions // this loop is basically a state machine based on the current sign: let mut current_sign = Some(Sign::Positive); // by default the first summand is positive let mut summands = Vec::new(); loop { let mut s = *chars; - match (¤t_sign, next_token(&mut s)) { - (None, Ok(Token::Plus)) => { + match (¤t_sign, next_token(&mut s)?) { + (None, Token::Plus) => { *chars = s; current_sign = Some(Sign::Positive); } - (None, Ok(Token::Minus)) => { + (None, Token::Minus) => { *chars = s; current_sign = Some(Sign::Negative); } - (Some(Sign::Positive), Ok(Token::Minus)) => { + (Some(Sign::Positive), Token::Minus) => { *chars = s; current_sign = Some(Sign::Negative); } - (Some(Sign::Negative), Ok(Token::Minus)) => { + (Some(Sign::Negative), Token::Minus) => { *chars = s; current_sign = Some(Sign::Positive); } ( Some(sign), - Ok( - token @ (Token::Number(_) - | Token::Character(_) - | Token::True - | Token::False), - ), + token @ (Token::Number(_) | Token::Character(_) | Token::True | Token::False), ) => { *chars = s; let parsed_int = match token { @@ -134,7 +149,7 @@ impl Expression { }); current_sign = None; } - (Some(sign), Ok(Token::Name(_) | Token::Asterisk)) => { + (Some(sign), Token::Name(_) | Token::Asterisk) => { let var = parse_var_target(chars)?; summands.push(match sign { Sign::Positive => Expression::VariableReference(var), @@ -145,18 +160,18 @@ impl Expression { }); current_sign = None; } - (Some(sign), Ok(Token::LeftParenthesis)) => { + (Some(sign), Token::LeftParenthesis) => { *chars = s; - let braced_expr = Self::parse(chars)?; + let inner_expr = Self::parse(chars)?; // probably inefficent but everything needs to be flattened at some point anyway so won't matter // TODO: make expression structure more efficient (don't use vectors every time there is a negative) - summands.push(match (sign, braced_expr.clone()) { + summands.push(match (sign, inner_expr.clone()) { ( Sign::Negative, Expression::NaturalNumber(_) | Expression::VariableReference(_), ) => Expression::SumExpression { sign: Sign::Negative, - summands: vec![braced_expr], + summands: vec![inner_expr], }, ( Sign::Negative, @@ -178,26 +193,30 @@ impl Expression { sign: Sign::Negative, summands, }, - _ => braced_expr, + _ => inner_expr, }); + let Token::RightParenthesis = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected `)` after inner expression."); + }; current_sign = None; } // TODO: add delimiters here: `)` `;` `,` `{` `into` ( sign, - Ok( - Token::RightParenthesis - | Token::Semicolon - | Token::Comma - | Token::LeftBrace - | Token::Into, - ), + Token::RightParenthesis + | Token::RightSquareBracket + | Token::Semicolon + | Token::Comma + | Token::LeftBrace + | Token::Into + | Token::None, ) => { r_assert!(sign.is_none(), "Expected more terms in expression."); break; } // TODO: add source snippet - token => r_panic!("Unexpected token {token:#?} found in expression."), + (_, token) => r_panic!("Unexpected token `{token}` found in expression."), } } diff --git a/compiler/src/parser/parser.rs b/compiler/src/parser/parser.rs index 096d02f..45f5aa0 100644 --- a/compiler/src/parser/parser.rs +++ b/compiler/src/parser/parser.rs @@ -39,6 +39,8 @@ fn parse_clause( let mut s = *chars; Ok(match next_token(&mut s)? { Token::None => Clause::None, + Token::LeftBrace => Clause::Block(parse_block_clauses(chars)?), + Token::Output => parse_output_clause(chars)?, Token::If => parse_if_else_clause(chars)?, Token::While => parse_while_clause(chars)?, Token::Fn => parse_function_definition_clause(chars)?, @@ -57,11 +59,11 @@ fn parse_clause( Token::LeftParenthesis => todo!(), _ => todo!(), }, - token => r_panic!("Invalid starting token: {token:?}"), + token => r_panic!("Invalid starting token: `{token}`"), }) } -fn parse_block( +fn parse_block_clauses( chars: &mut &[char], ) -> Result>, String> { let Token::LeftBrace = next_token(chars)? else { @@ -73,6 +75,7 @@ fn parse_block( { let mut s = *chars; if let Token::RightBrace = next_token(&mut s)? { + *chars = s; break; } } @@ -329,13 +332,13 @@ fn parse_if_else_clause( r_panic!("Expected code block in if-else clause."); }; } - let block_one = parse_block(chars)?; + let block_one = parse_block_clauses(chars)?; let block_two = { let mut s = *chars; if let Token::Else = next_token(&mut s)? { *chars = s; - Some(parse_block(chars)?) + Some(parse_block_clauses(chars)?) } else { None } @@ -383,7 +386,7 @@ fn parse_while_clause( r_panic!("Expected code block in while clause."); }; } - let loop_block = parse_block(chars)?; + let loop_block = parse_block_clauses(chars)?; Ok(Clause::While { var: condition_variable, @@ -430,7 +433,7 @@ fn parse_function_definition_clause( Ok(Clause::DefineFunction { name: function_name, arguments, - block: parse_block(chars)?, + block: parse_block_clauses(chars)?, }) } @@ -488,3 +491,18 @@ fn parse_let_clause(chars: &mut &[char]) -> Result(chars: &mut &[char]) -> Result, String> { + let Token::Output = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected keyword `output` in output clause."); + }; + + let value = Expression::parse(chars)?; + + let Token::Semicolon = next_token(chars)? else { + r_panic!("Expected semicolon at end of output clause."); + }; + + Ok(Clause::Output { value }) +} diff --git a/compiler/src/parser/tests.rs b/compiler/src/parser/tests.rs index f7f5d43..fc3c826 100644 --- a/compiler/src/parser/tests.rs +++ b/compiler/src/parser/tests.rs @@ -1,3 +1,5 @@ +use crate::parser::parser::parse_program; + #[cfg(test)] mod parser_tests { use super::super::{ @@ -13,50 +15,70 @@ mod parser_tests { bf2d::{Opcode2D, TapeCell2D}, }; + fn _parser_test(raw: &str, expected: &[Clause]) { + assert_eq!(parse_program(raw).unwrap(), expected); + } + + fn _parser_test_2d(raw: &str, expected: &[Clause]) { + assert_eq!(parse_program(raw).unwrap(), expected); + } + #[test] fn parse_if_1() { - assert!(parse_program::("if true {{}}") - .unwrap() - .iter() - .eq(&[Clause::If { + _parser_test( + "if true {{}}", + &[Clause::If { condition: Expression::NaturalNumber(1), if_block: vec![Clause::::Block(vec![])], - }])); + }], + ); } #[test] fn end_tokens_1() { assert_eq!( parse_program::("clobbers").unwrap_err(), - "" + "Invalid starting token: `clobbers`" ); } #[test] fn end_tokens_2() { - assert_eq!(parse_program::(";").unwrap_err(), ""); - assert_eq!(parse_program::(";;").unwrap_err(), ""); - assert_eq!(parse_program::(";;;").unwrap_err(), ""); + assert_eq!( + parse_program::(";").unwrap_err(), + "Invalid starting token: `;`" + ); + assert_eq!( + parse_program::(";;").unwrap_err(), + "Invalid starting token: `;`" + ); + assert_eq!( + parse_program::(";;;").unwrap_err(), + "Invalid starting token: `;`" + ); } #[test] fn end_tokens_3() { - assert_eq!(parse_program::("cell;").unwrap_err(), "") + assert_eq!( + parse_program::("cell;").unwrap_err(), + "Expected name in variable definition." + ) } #[test] fn while_condition_1() { - assert!(parse_program::("while x {{}}") - .unwrap() - .iter() - .eq(&[Clause::While { + _parser_test( + "while x {{}}", + &[Clause::While { var: VariableTarget { name: String::from("x"), subfields: None, - is_spread: false + is_spread: false, }, - block: vec![Clause::Block(vec![])] - }])) + block: vec![Clause::Block(vec![])], + }], + ); } #[test] @@ -69,134 +91,127 @@ mod parser_tests { #[test] fn two_dimensional_2() { - assert!(parse_program::("cell x @(0, 1);") - .unwrap() - .iter() - .eq(&[Clause::DeclareVariable { + _parser_test_2d( + "cell x @(0, 1);", + &[Clause::DeclareVariable { var: VariableTypeDefinition { name: String::from("x"), var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::Cell(TapeCell2D(0, 1)) - } - }])); + location_specifier: LocationSpecifier::Cell(TapeCell2D(0, 1)), + }, + }], + ); } #[test] fn two_dimensional_3() { - assert!( - parse_program::("cell xyz @(-10, -101);") - .unwrap() - .iter() - .eq(&[Clause::DeclareVariable { - var: VariableTypeDefinition { - name: String::from("xyz"), - var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::Cell(TapeCell2D(-10, -101)) - } - }]) + _parser_test_2d( + "cell xyz @(-10, -101);", + &[Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("xyz"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::Cell(TapeCell2D(-10, -101)), + }, + }], ); } #[test] fn var_v_1d() { - assert!(parse_program::("cell v;") - .unwrap() - .iter() - .eq(&[Clause::DeclareVariable { + _parser_test( + "cell v;", + &[Clause::DeclareVariable { var: VariableTypeDefinition { name: String::from("v"), var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::None - } - }])) + location_specifier: LocationSpecifier::None, + }, + }], + ); } #[test] fn var_v_2d() { - assert!(parse_program::("cell v;") - .unwrap() - .iter() - .eq(&[Clause::DeclareVariable { + _parser_test_2d( + "cell v;", + &[Clause::DeclareVariable { var: VariableTypeDefinition { name: String::from("v"), var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::None - } - }])) + location_specifier: LocationSpecifier::None, + }, + }], + ); } #[test] fn inline_bf_1() { - assert!( - parse_program::("cell v; bf {+{cell v;}-}") - .unwrap() - .iter() - .eq(&[ - Clause::DeclareVariable { - var: VariableTypeDefinition { - name: String::from("v"), - var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::None - } - }, - Clause::InlineBrainfuck { + _parser_test( + "cell v; bf {+{cell v;}-}", + &[ + Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, location_specifier: LocationSpecifier::None, - clobbered_variables: vec![], - operations: vec![ - ExtendedOpcode::Opcode(Opcode::Add), - ExtendedOpcode::Block(vec![Clause::DeclareVariable { - var: VariableTypeDefinition { - name: String::from("v"), - var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::None - } - }]), - ExtendedOpcode::Opcode(Opcode::Subtract), - ] - } - ]) - ) + }, + }, + Clause::InlineBrainfuck { + location_specifier: LocationSpecifier::None, + clobbered_variables: vec![], + operations: vec![ + ExtendedOpcode::Opcode(Opcode::Add), + ExtendedOpcode::Block(vec![Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None, + }, + }]), + ExtendedOpcode::Opcode(Opcode::Subtract), + ], + }, + ], + ); } #[test] fn inline_bf_2() { - assert!( - parse_program::("cell v; bf {v{cell v;}^}") - .unwrap() - .iter() - .eq(&[ - Clause::DeclareVariable { - var: VariableTypeDefinition { - name: String::from("v"), - var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::None - } - }, - Clause::InlineBrainfuck { + _parser_test_2d( + "cell v; bf {v{cell v;}^}", + &[ + Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, location_specifier: LocationSpecifier::None, - clobbered_variables: vec![], - operations: vec![ - ExtendedOpcode::Opcode(Opcode2D::Down), - ExtendedOpcode::Block(vec![Clause::DeclareVariable { - var: VariableTypeDefinition { - name: String::from("v"), - var_type: VariableTypeReference::Cell, - location_specifier: LocationSpecifier::None - } - }]), - ExtendedOpcode::Opcode(Opcode2D::Up), - ] - } - ]) + }, + }, + Clause::InlineBrainfuck { + location_specifier: LocationSpecifier::None, + clobbered_variables: vec![], + operations: vec![ + ExtendedOpcode::Opcode(Opcode2D::Down), + ExtendedOpcode::Block(vec![Clause::DeclareVariable { + var: VariableTypeDefinition { + name: String::from("v"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None, + }, + }]), + ExtendedOpcode::Opcode(Opcode2D::Up), + ], + }, + ], ) } #[test] fn inline_bf_3() { - assert!(parse_program::("bf {vvvv>}") - .unwrap() - .iter() - .eq(&[Clause::InlineBrainfuck { + _parser_test_2d( + "bf {vvvv>}", + &[Clause::InlineBrainfuck { location_specifier: LocationSpecifier::None, clobbered_variables: vec![], operations: vec![ @@ -205,8 +220,9 @@ mod parser_tests { ExtendedOpcode::Opcode(Opcode2D::Down), ExtendedOpcode::Opcode(Opcode2D::Down), ExtendedOpcode::Opcode(Opcode2D::Right), - ] - }])) + ], + }], + ) } #[test] @@ -219,382 +235,396 @@ mod parser_tests { #[test] fn strings_1() { - assert!(parse_program::( + _parser_test( r#" cell[5] ggghh = "hello"; -"# - ) - .unwrap() - .iter() - .eq(&[Clause::DefineVariable { - var: VariableTypeDefinition { - name: String::from("ggghh"), - var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 5), - location_specifier: LocationSpecifier::None - }, - value: Expression::StringLiteral(String::from("hello")) - }])); +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("ggghh"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 5, + ), + location_specifier: LocationSpecifier::None, + }, + value: Expression::StringLiteral(String::from("hello")), + }], + ); } #[test] fn strings_1a() { - assert_eq!( - parse_program::( - r#" + _parser_test( + r#" cell[0] ggghh = ""; -"# - ) - .unwrap_err(), - "" +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("ggghh"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 0, + ), + location_specifier: LocationSpecifier::None, + }, + value: Expression::StringLiteral(String::from("")), + }], ); } #[test] fn strings_1b() { - assert!(parse_program::( + _parser_test( r#" cell[1] ggghh = "hello"; -"# - ) - .unwrap() - .iter() - .eq(&[Clause::DefineVariable { - var: VariableTypeDefinition { - name: String::from("ggghh"), - var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 1), - location_specifier: LocationSpecifier::None - }, - value: Expression::StringLiteral(String::from("hello")) - }])); +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("ggghh"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 1, + ), + location_specifier: LocationSpecifier::None, + }, + value: Expression::StringLiteral(String::from("hello")), + }], + ); } #[test] fn strings_2() { - assert!(parse_program::( + _parser_test( r#" cell[6] ggghh = "hel'lo"; -"# - ) - .unwrap() - .iter() - .eq(&[Clause::DefineVariable { - var: VariableTypeDefinition { - name: String::from("ggghh"), - var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 6), - location_specifier: LocationSpecifier::None - }, - value: Expression::StringLiteral(String::from("hel'lo")) - }])); +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("ggghh"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 6, + ), + location_specifier: LocationSpecifier::None, + }, + value: Expression::StringLiteral(String::from("hel'lo")), + }], + ); } #[test] fn strings_3() { - assert!(parse_program::( + _parser_test( r#" cell[7] ggghh = "\"hello\""; -"# - ) - .unwrap() - .iter() - .eq(&[Clause::DefineVariable { - var: VariableTypeDefinition { - name: String::from("ggghh"), - var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 7), - location_specifier: LocationSpecifier::None - }, - value: Expression::StringLiteral(String::from("\"hello\"")) - }])); +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("ggghh"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 7, + ), + location_specifier: LocationSpecifier::None, + }, + value: Expression::StringLiteral(String::from("\"hello\"")), + }], + ); } #[test] fn arrays_1() { - assert!(parse_program::( + _parser_test( r#" cell[0] ggghh = []; -"# - ) - .unwrap() - .iter() - .eq(&[Clause::DefineVariable { - var: VariableTypeDefinition { - name: String::from("ggghh"), - var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 0), - location_specifier: LocationSpecifier::None - }, - value: Expression::ArrayLiteral(vec![]) - }])); +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("ggghh"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 0, + ), + location_specifier: LocationSpecifier::None, + }, + value: Expression::ArrayLiteral(vec![]), + }], + ); } #[test] fn arrays_2() { - assert!(parse_program::( + _parser_test( r#" cell[333] arr = [45, 53]; -"# - ) - .unwrap() - .iter() - .eq(&[Clause::DefineVariable { - var: VariableTypeDefinition { - name: String::from("arr"), - var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 333), - location_specifier: LocationSpecifier::None - }, - value: Expression::ArrayLiteral(vec![ - Expression::NaturalNumber(45), - Expression::NaturalNumber(53) - ]) - }])); +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 333, + ), + location_specifier: LocationSpecifier::None, + }, + value: Expression::ArrayLiteral(vec![ + Expression::NaturalNumber(45), + Expression::NaturalNumber(53), + ]), + }], + ); } #[test] fn arrays_2a() { - assert!(parse_program::( + _parser_test( r#" cell[333] arr = [45 + 123, 53]; -"# - ) - .unwrap() - .iter() - .eq(&[Clause::DefineVariable { - var: VariableTypeDefinition { - name: String::from("arr"), - var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 333), - location_specifier: LocationSpecifier::None - }, - value: Expression::ArrayLiteral(vec![ - Expression::SumExpression { - sign: Sign::Positive, - summands: vec![ - Expression::NaturalNumber(45), - Expression::NaturalNumber(123) - ] +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 333, + ), + location_specifier: LocationSpecifier::None, }, - Expression::NaturalNumber(53) - ]) - }])); + value: Expression::ArrayLiteral(vec![ + Expression::SumExpression { + sign: Sign::Positive, + summands: vec![ + Expression::NaturalNumber(45), + Expression::NaturalNumber(123), + ], + }, + Expression::NaturalNumber(53), + ]), + }], + ); } #[test] fn arrays_2b() { - assert!(parse_program::( + _parser_test( r#" cell[333] arr = [45 + 123, -(53 + 0+78-9)]; -"# - ) - .unwrap() - .iter() - .eq(&[Clause::DefineVariable { - var: VariableTypeDefinition { - name: String::from("arr"), - var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 333), - location_specifier: LocationSpecifier::None - }, - value: Expression::ArrayLiteral(vec![ - Expression::SumExpression { - sign: Sign::Positive, - summands: vec![ - Expression::NaturalNumber(45), - Expression::NaturalNumber(123) - ] +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 333, + ), + location_specifier: LocationSpecifier::None, }, - Expression::SumExpression { - sign: Sign::Negative, - summands: vec![ - Expression::NaturalNumber(53), - Expression::NaturalNumber(0), - Expression::NaturalNumber(78), - Expression::SumExpression { - sign: Sign::Negative, - summands: vec![Expression::NaturalNumber(9)] - } - ] - } - ]) - }])); + value: Expression::ArrayLiteral(vec![ + Expression::SumExpression { + sign: Sign::Positive, + summands: vec![ + Expression::NaturalNumber(45), + Expression::NaturalNumber(123), + ], + }, + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![ + Expression::NaturalNumber(53), + Expression::NaturalNumber(0), + Expression::NaturalNumber(78), + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![Expression::NaturalNumber(9)], + }, + ], + }, + ]), + }], + ); } #[test] fn arrays_3() { - assert!(parse_program::( + _parser_test( r#" cell[3] arr = ['h', 53, (((4)))]; -"# - ) - .unwrap() - .iter() - .eq(&[Clause::DefineVariable { - var: VariableTypeDefinition { - name: String::from("arr"), - var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 3), - location_specifier: LocationSpecifier::None - }, - value: Expression::ArrayLiteral(vec![ - Expression::NaturalNumber(104), - Expression::NaturalNumber(53), - Expression::NaturalNumber(4) - ]) - }])); +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 3, + ), + location_specifier: LocationSpecifier::None, + }, + value: Expression::ArrayLiteral(vec![ + Expression::NaturalNumber(104), + Expression::NaturalNumber(53), + Expression::NaturalNumber(4), + ]), + }], + ); } #[test] fn arrays_4() { - assert!(parse_program::( + _parser_test( r#" struct nonsense[39] arr @-56 = ["hello!", 53, [4,5,6]]; -"# - ) - .unwrap() - .iter() - .eq(&[Clause::DefineVariable { - var: VariableTypeDefinition { - name: String::from("arr"), - var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 39), - location_specifier: LocationSpecifier::Cell(-56) - }, - value: Expression::ArrayLiteral(vec![ - Expression::StringLiteral(String::from("hello!")), - Expression::NaturalNumber(53), - Expression::ArrayLiteral(vec![ - Expression::NaturalNumber(4), - Expression::NaturalNumber(5), - Expression::NaturalNumber(6) - ]) - ]) - }])); +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 39, + ), + location_specifier: LocationSpecifier::Cell(-56), + }, + value: Expression::ArrayLiteral(vec![ + Expression::StringLiteral(String::from("hello!")), + Expression::NaturalNumber(53), + Expression::ArrayLiteral(vec![ + Expression::NaturalNumber(4), + Expression::NaturalNumber(5), + Expression::NaturalNumber(6), + ]), + ]), + }], + ); } #[test] fn arrays_5() { - assert!(parse_program::( + _parser_test( r#" struct nonsense[39] arr @-56 = ["hello!", ',', [4,"hello comma: ,",6]]; -"# - ) - .unwrap() - .iter() - .eq(&[Clause::DefineVariable { - var: VariableTypeDefinition { - name: String::from("arr"), - var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 39), - location_specifier: LocationSpecifier::Cell(-56) - }, - value: Expression::ArrayLiteral(vec![ - Expression::StringLiteral(String::from("hello!")), - Expression::NaturalNumber(44), - Expression::ArrayLiteral(vec![ - Expression::NaturalNumber(4), - Expression::StringLiteral(String::from("hello comma: ,")), - Expression::NaturalNumber(6) - ]) - ]) - }])); +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 39, + ), + location_specifier: LocationSpecifier::Cell(-56), + }, + value: Expression::ArrayLiteral(vec![ + Expression::StringLiteral(String::from("hello!")), + Expression::NaturalNumber(44), + Expression::ArrayLiteral(vec![ + Expression::NaturalNumber(4), + Expression::StringLiteral(String::from("hello comma: ,")), + Expression::NaturalNumber(6), + ]), + ]), + }], + ); } #[test] fn sums_1() { - assert!(parse_program::( + _parser_test( r#" struct nonsense[39] arr @-56 = 56 - ( 4+3+( -7-5 +(6)-(((( (0) )))) ) ); -"# - ) - .unwrap() - .iter() - .eq(&[Clause::DefineVariable { - var: VariableTypeDefinition { - name: String::from("arr"), - var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 39), - location_specifier: LocationSpecifier::Cell(-56) - }, - value: Expression::SumExpression { - sign: Sign::Positive, - summands: vec![ - Expression::NaturalNumber(56), - Expression::SumExpression { - sign: Sign::Negative, - summands: vec![ - Expression::NaturalNumber(4), - Expression::NaturalNumber(3), - Expression::SumExpression { - sign: Sign::Positive, - summands: vec![ - Expression::SumExpression { - sign: Sign::Negative, - summands: vec![Expression::NaturalNumber(7)] - }, - Expression::SumExpression { - sign: Sign::Negative, - summands: vec![Expression::NaturalNumber(5)] - }, - Expression::NaturalNumber(6) - ] - } - ] - } - ] - } - }])); +"#, + &[Clause::DefineVariable { + var: VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array( + Box::new(VariableTypeReference::Cell), + 39, + ), + location_specifier: LocationSpecifier::Cell(-56), + }, + value: Expression::SumExpression { + sign: Sign::Positive, + summands: vec![ + Expression::NaturalNumber(56), + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![ + Expression::NaturalNumber(4), + Expression::NaturalNumber(3), + Expression::SumExpression { + sign: Sign::Positive, + summands: vec![ + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![Expression::NaturalNumber(7)], + }, + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![Expression::NaturalNumber(5)], + }, + Expression::NaturalNumber(6), + ], + }, + ], + }, + ], + }, + }], + ); } #[test] fn blocks_1() { - assert!(parse_program::("{}") - .unwrap() - .iter() - .eq(&[Clause::Block(vec![])])); + _parser_test("{}", &[Clause::Block(vec![])]); } #[test] fn blocks_1a() { - assert!(parse_program::(" {}{} {} {} ") - .unwrap() - .iter() - .eq(&[ + _parser_test( + " {}{} {} {} ", + &[ + Clause::Block(vec![]), Clause::Block(vec![]), Clause::Block(vec![]), Clause::Block(vec![]), - Clause::Block(vec![]) - ])); + ], + ); } #[test] fn blocks_1b() { - assert!(parse_program::(" {}{{{{}}{}}} {} {} ") - .unwrap() - .iter() - .eq(&[ + _parser_test( + " {}{{{{}}{}}} {} {} ", + &[ Clause::Block(vec![]), Clause::Block(vec![Clause::Block(vec![ Clause::Block(vec![Clause::Block(vec![])]), - Clause::Block(vec![]) + Clause::Block(vec![]), ])]), Clause::Block(vec![]), - Clause::Block(vec![]) - ])); + Clause::Block(vec![]), + ], + ); } #[test] fn blocks_2() { - assert!( - parse_program::("{output 1;output 2;}{{{} output 3;}}") - .unwrap() - .iter() - .eq(&[ - Clause::Block(vec![ - Clause::OutputValue { - value: Expression::NaturalNumber(1), - }, - Clause::OutputValue { - value: Expression::NaturalNumber(2), - } - ]), - Clause::Block(vec![Clause::Block(vec![ - Clause::Block(vec![]), - Clause::OutputValue { - value: Expression::NaturalNumber(3) - } - ])]) - ]) + _parser_test( + "{output 1;output 2;}{{{} output 3;}}", + &[ + Clause::Block(vec![ + Clause::Output { + value: Expression::NaturalNumber(1), + }, + Clause::Output { + value: Expression::NaturalNumber(2), + }, + ]), + Clause::Block(vec![Clause::Block(vec![ + Clause::Block(vec![]), + Clause::Output { + value: Expression::NaturalNumber(3), + }, + ])]), + ], ); } } diff --git a/compiler/src/parser/tokeniser.rs b/compiler/src/parser/tokeniser.rs index f89c7a8..305d53e 100644 --- a/compiler/src/parser/tokeniser.rs +++ b/compiler/src/parser/tokeniser.rs @@ -1,53 +1,9 @@ // TODO: make an impl for a tokeniser, inverse-builder pattern? // have a function to peek, then accept changes, so we don't double hangle tokens -use crate::macros::macros::r_panic; +use std::fmt::Display; -#[derive(Debug, Clone, PartialEq)] -pub enum Token { - None, - Output, - Input, - Fn, - Cell, - Struct, - While, - If, - Not, - Else, - Copy, - Drain, - Into, - Bf, - Clobbers, - Assert, - Equals, - Unknown, - True, - False, - LeftBrace, - RightBrace, - LeftSquareBracket, - RightSquareBracket, - LeftParenthesis, - RightParenthesis, - Comma, - Dot, - Asterisk, - At, - Plus, - Minus, - EqualsSign, - Semicolon, - PlusPlus, - MinusMinus, - PlusEquals, - MinusEquals, - Name(String), - Number(usize), - String(String), - Character(char), -} +use crate::macros::macros::r_panic; /// Get the next token from chars, advance the passed in pointer pub fn next_token(chars: &mut &[char]) -> Result { @@ -318,6 +274,140 @@ fn parse_string_literal(chars: &mut &[char]) -> Result { Ok(parsed_string) } +#[derive(Debug, Clone, PartialEq)] +pub enum Token { + None, + Output, + Input, + Fn, + Cell, + Struct, + While, + If, + Not, + Else, + Copy, + Drain, + Into, + Bf, + Clobbers, + Assert, + Equals, + Unknown, + True, + False, + LeftBrace, + RightBrace, + LeftSquareBracket, + RightSquareBracket, + LeftParenthesis, + RightParenthesis, + Comma, + Dot, + Asterisk, + At, + Plus, + Minus, + EqualsSign, + Semicolon, + PlusPlus, + MinusMinus, + PlusEquals, + MinusEquals, + Name(String), + Number(usize), + String(String), + Character(char), +} + +impl Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Token::Output + | Token::Input + | Token::Fn + | Token::Cell + | Token::Struct + | Token::While + | Token::If + | Token::Not + | Token::Else + | Token::Copy + | Token::Drain + | Token::Into + | Token::Bf + | Token::Clobbers + | Token::Assert + | Token::Equals + | Token::Unknown + | Token::True + | Token::False + | Token::LeftBrace + | Token::RightBrace + | Token::LeftSquareBracket + | Token::RightSquareBracket + | Token::LeftParenthesis + | Token::RightParenthesis + | Token::Comma + | Token::Dot + | Token::Asterisk + | Token::At + | Token::Plus + | Token::Minus + | Token::EqualsSign + | Token::Semicolon + | Token::PlusPlus + | Token::MinusMinus + | Token::PlusEquals + | Token::MinusEquals => f.write_str(match self { + Token::Output => "output", + Token::Input => "input", + Token::Fn => "fn", + Token::Cell => "cell", + Token::Struct => "struct", + Token::While => "while", + Token::If => "if", + Token::Not => "not", + Token::Else => "else", + Token::Copy => "copy", + Token::Drain => "drain", + Token::Into => "into", + Token::Bf => "bf", + Token::Clobbers => "clobbers", + Token::Assert => "assert", + Token::Equals => "equals", + Token::Unknown => "unknown", + Token::True => "true", + Token::False => "false", + Token::LeftBrace => "{", + Token::RightBrace => "}", + Token::LeftSquareBracket => "[", + Token::RightSquareBracket => "]", + Token::LeftParenthesis => "(", + Token::RightParenthesis => ")", + Token::Comma => ",", + Token::Dot => ".", + Token::Asterisk => "*", + Token::At => "@", + Token::Plus => "+", + Token::Minus => "-", + Token::EqualsSign => "=", + Token::Semicolon => ";", + Token::PlusPlus => "++", + Token::MinusMinus => "--", + Token::PlusEquals => "+=", + Token::MinusEquals => "-=", + _ => unreachable!(), + }), + Token::Name(name) => f.write_str(name), + Token::Number(number) => f.write_fmt(format_args!("{number}")), + Token::String(s) => f.write_fmt(format_args!("\"{s}\"")), + Token::Character(c) => f.write_fmt(format_args!("'{c}'")), + Token::None => Ok(()), + } + } +} + #[cfg(test)] mod tokeniser_tests { use super::*; diff --git a/compiler/src/parser/types.rs b/compiler/src/parser/types.rs index 5961e81..40cd81d 100644 --- a/compiler/src/parser/types.rs +++ b/compiler/src/parser/types.rs @@ -47,10 +47,10 @@ pub enum Clause { var: VariableTarget, block: Vec>, }, - OutputValue { + Output { value: Expression, }, - InputVariable { + Input { var: VariableTarget, }, DefineFunction { From 9a6ebc5450c6e492ae0c17926d365afbca4edd4f Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Mon, 10 Nov 2025 15:40:40 +1100 Subject: [PATCH 39/56] Fix incorrect parser tests --- compiler/src/parser/parser.rs | 8 ++++++-- compiler/src/parser/tests.rs | 14 ++++++++------ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/compiler/src/parser/parser.rs b/compiler/src/parser/parser.rs index 45f5aa0..f712571 100644 --- a/compiler/src/parser/parser.rs +++ b/compiler/src/parser/parser.rs @@ -108,7 +108,9 @@ impl TapeCellLocation for TapeCell { // variable location specifier: Token::Name(_) => Ok(LocationSpecifier::Variable(parse_var_target(chars)?)), // TODO: add source snippet - _ => r_panic!("Invalid location specifier.",), + token => r_panic!( + "Unexpected token `{token}` found while parsing location specifier. (is 2D mode turned on?)" + ), } } @@ -141,7 +143,9 @@ impl TapeCellLocation for TapeCell2D { // variable location specifier: Token::Name(_) => Ok(LocationSpecifier::Variable(parse_var_target(chars)?)), // TODO: add source snippet - _ => r_panic!("Invalid location specifier."), + token => { + r_panic!("Unexpected token `{token}` found while parsing 2D location specifier.") + } } } diff --git a/compiler/src/parser/tests.rs b/compiler/src/parser/tests.rs index fc3c826..9ea5fef 100644 --- a/compiler/src/parser/tests.rs +++ b/compiler/src/parser/tests.rs @@ -1,5 +1,3 @@ -use crate::parser::parser::parse_program; - #[cfg(test)] mod parser_tests { use super::super::{ @@ -85,7 +83,7 @@ mod parser_tests { fn two_dimensional_1() { assert_eq!( parse_program::("cell x @(0, 1);").unwrap_err(), - "Invalid location specifier @(0, 1)" + "Unexpected token `(` found while parsing location specifier. (is 2D mode turned on?)" ); } @@ -479,7 +477,7 @@ struct nonsense[39] arr @-56 = ["hello!", 53, [4,5,6]]; var: VariableTypeDefinition { name: String::from("arr"), var_type: VariableTypeReference::Array( - Box::new(VariableTypeReference::Cell), + Box::new(VariableTypeReference::Struct(String::from("nonsense"))), 39, ), location_specifier: LocationSpecifier::Cell(-56), @@ -507,7 +505,7 @@ struct nonsense[39] arr @-56 = ["hello!", ',', [4,"hello comma: ,",6]]; var: VariableTypeDefinition { name: String::from("arr"), var_type: VariableTypeReference::Array( - Box::new(VariableTypeReference::Cell), + Box::new(VariableTypeReference::Struct(String::from("nonsense"))), 39, ), location_specifier: LocationSpecifier::Cell(-56), @@ -535,7 +533,7 @@ struct nonsense[39] arr @-56 = 56 - ( 4+3+( -7-5 +(6)-(((( (0) )))) ) ); var: VariableTypeDefinition { name: String::from("arr"), var_type: VariableTypeReference::Array( - Box::new(VariableTypeReference::Cell), + Box::new(VariableTypeReference::Struct(String::from("nonsense"))), 39, ), location_specifier: LocationSpecifier::Cell(-56), @@ -561,6 +559,10 @@ struct nonsense[39] arr @-56 = 56 - ( 4+3+( -7-5 +(6)-(((( (0) )))) ) ); summands: vec![Expression::NaturalNumber(5)], }, Expression::NaturalNumber(6), + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![Expression::NaturalNumber(0)], + }, ], }, ], From 7a4c54fb93f8831165791b1e407271847e50e118 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Mon, 10 Nov 2025 17:18:26 +1100 Subject: [PATCH 40/56] Implement drain/copy loop parsing --- compiler/src/frontend.rs | 28 +++-- compiler/src/parser/expressions.rs | 49 +++++++- compiler/src/parser/old_parser.rs | 170 --------------------------- compiler/src/parser/old_tokeniser.rs | 144 ----------------------- compiler/src/parser/parser.rs | 140 ++++++++++++++++++---- compiler/src/parser/tests.rs | 10 +- compiler/src/parser/types.rs | 11 +- 7 files changed, 193 insertions(+), 359 deletions(-) delete mode 100644 compiler/src/parser/old_tokeniser.rs diff --git a/compiler/src/frontend.rs b/compiler/src/frontend.rs index 309f944..989df7b 100644 --- a/compiler/src/frontend.rs +++ b/compiler/src/frontend.rs @@ -155,7 +155,7 @@ impl MastermindContext { } } } - Clause::SetVariable { + Clause::Assign { var, value, self_referencing, @@ -176,7 +176,7 @@ impl MastermindContext { // etc... } }, - Clause::AddToVariable { + Clause::AddAssign { var, value, self_referencing, @@ -331,17 +331,19 @@ impl MastermindContext { // close the loop scope.push_instruction(Instruction::CloseLoop(cell)); } - Clause::CopyLoop { + Clause::DrainLoop { source, targets, block, - is_draining, + is_copying, } => { // TODO: refactor this, there is duplicate code with copying the source value cell - let (source_cell, free_source_cell) = match (is_draining, &source) { + let (source_cell, free_source_cell) = match (is_copying, &source) { // draining loops can drain from an expression or a variable - (true, Expression::VariableReference(var)) => (scope.get_cell(var)?, false), - (true, _) => { + (false, Expression::VariableReference(var)) => { + (scope.get_cell(var)?, false) + } + (false, _) => { // any other kind of expression, allocate memory for it automatically let id = scope.push_memory_id(); scope @@ -353,7 +355,7 @@ impl MastermindContext { scope._add_expr_to_cell(&source, new_cell)?; (new_cell, true) } - (false, Expression::VariableReference(var)) => { + (true, Expression::VariableReference(var)) => { let cell = scope.get_cell(var)?; let new_mem_id = scope.push_memory_id(); @@ -371,16 +373,18 @@ impl MastermindContext { (new_cell, true) } - (false, _) => { + (true, _) => { r_panic!("Cannot copy from {source:#?}, use a drain loop instead") } }; scope.push_instruction(Instruction::OpenLoop(source_cell)); // recurse - let loop_scope = self.create_ir_scope(&block, Some(&scope))?; - // TODO: refactor, make a function in scope trait to do this automatically - scope.instructions.extend(loop_scope.build_ir(true)); + if let Some(block) = block { + let loop_scope = self.create_ir_scope(&block, Some(&scope))?; + // TODO: refactor, make a function in scope trait to do this automatically + scope.instructions.extend(loop_scope.build_ir(true)); + } // copy into each target and decrement the source for target in targets { diff --git a/compiler/src/parser/expressions.rs b/compiler/src/parser/expressions.rs index e0c4100..3d4bb58 100644 --- a/compiler/src/parser/expressions.rs +++ b/compiler/src/parser/expressions.rs @@ -5,10 +5,10 @@ use super::{ }; use crate::macros::macros::{r_assert, r_panic}; -use std::num::Wrapping; +use itertools::Itertools; +use std::{fmt::Display, num::Wrapping}; -// TODO: add multiplication -// yes, but no variable * variable multiplication or division +// TODO: simplify expression data structure for negative sums of single values #[derive(Debug, Clone)] #[cfg_attr(test, derive(PartialEq))] pub enum Expression { @@ -322,3 +322,46 @@ impl Expression { } } } + +// TODO: test expression display code? +impl Display for Expression { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Expression::SumExpression { sign, summands } => { + if let Sign::Negative = sign { + f.write_str("- ")?; + } + f.write_str("(")?; + + let mut summands_iter = summands.iter(); + // TODO: refactor to remove the need for this + if let Some(first_expr) = summands_iter.next() { + f.write_fmt(format_args!("{first_expr}")); + for expr in summands_iter { + f.write_str(" ")?; + match expr { + Expression::SumExpression { + sign: Sign::Negative, + summands: _, + } => (), + _ => f.write_str("+ ")?, + } + f.write_fmt(format_args!("{expr}"))?; + } + } + + f.write_str(")")?; + } + Expression::NaturalNumber(number) => f.write_fmt(format_args!("{number}"))?, + Expression::VariableReference(variable_target) => { + f.write_fmt(format_args!("{variable_target}"))? + } + Expression::ArrayLiteral(expressions) => { + f.write_fmt(format_args!("[{}]", expressions.iter().join(", ")))?; + } + Expression::StringLiteral(s) => f.write_fmt(format_args!("\"{s}\""))?, + } + + Ok(()) + } +} diff --git a/compiler/src/parser/old_parser.rs b/compiler/src/parser/old_parser.rs index 814fed5..9abb8ca 100644 --- a/compiler/src/parser/old_parser.rs +++ b/compiler/src/parser/old_parser.rs @@ -100,40 +100,6 @@ pub fn parse_clause_from_tokens( }) } -fn parse_add_clause(clause: &[Token]) -> Result>, String> { - let mut clauses = Vec::new(); - let mut i = 0usize; - let (var, len) = parse_var_target(&clause[i..])?; - i += len; - - let positive = match &clause[i] { - Token::Plus => true, - Token::Minus => false, - _ => { - r_panic!("Unexpected second token in add clause: {clause:#?}"); - } - }; - i += 2; // assume the equals sign is there because it was checked by the main loop - let raw_expr = Expression::parse(&clause[i..(clause.len() - 1)])?; - let expr = match positive { - true => raw_expr, - false => Expression::SumExpression { - sign: Sign::Negative, - summands: vec![raw_expr], - }, - }; - //Check if this add clause self references - let self_referencing = expr.check_self_referencing(&var); - - clauses.push(Clause::AddToVariable { - var, - value: expr, - self_referencing: self_referencing, - }); - - Ok(clauses) -} - // currently just syntax sugar, should make it actually do post/pre increments fn parse_increment_clause(clause: &[Token]) -> Result, String> { let (var, _) = parse_var_target(&clause[2..])?; @@ -200,73 +166,6 @@ fn parse_set_clause(clause: &[Token]) -> Result, String> { }) } -fn parse_drain_copy_clause( - clause: &[Token], - is_draining: bool, - block: Vec>, -) -> Result, String> { - // drain g {i += 1;}; - // drain g into j; - // copy foo into bar {g += 2; etc;}; - // TODO: make a tuple-parsing function and use it here instead of a space seperated list of targets - - let mut targets = Vec::new(); - let mut i = 1usize; - - let condition_start_token = i; - - i += 1; - while let Some(token) = clause.get(i) { - if let Token::Into | Token::OpenBrace | Token::Semicolon = token { - break; - } - i += 1; - } - r_assert!( - i < clause.len(), - "Expected source expression in draining/copying loop: {clause:#?}" - ); - - let source = Expression::parse(&clause[condition_start_token..i])?; - - if let Token::Into = &clause[i] { - // simple drain/copy move operations - i += 1; - - loop { - match &clause[i] { - Token::Name(_) | Token::Asterisk => { - let (var, len) = parse_var_target(&clause[i..])?; - i += len; - targets.push(var); - } - Token::OpenBrace | Token::Semicolon => { - break; - } - _ => { - r_panic!("Unexpected token in drain clause: {clause:#?}"); - } - } - } - } - - // TODO: fix ordering of blocks in new parser, may have to rewrite the whole parser to use &[char] - // if let Token::OpenBrace = &clause[i] { - // // code block to execute at each loop iteration - // let braced_tokens = get_braced_tokens(&clause[i..], BRACES)?; - // // recursion - // block.extend(parse(braced_tokens)?); - // // i += 2 + braced_tokens.len(); - // } - - Ok(Clause::CopyLoop { - source, - targets, - block, - is_draining, - }) -} - fn parse_if_else_clause( clause: &[Token], ) -> Result, String> { @@ -536,72 +435,3 @@ fn parse_function_call_clause(clause: &[Token]) -> Result, St arguments: args, }) } - -/// get a clause's tokens, typically a line, bounded by ; -fn get_clause_tokens(tokens: &[Token]) -> Result, String> { - if tokens.len() == 0 { - Ok(None) - } else { - let mut i = 0usize; - while i < tokens.len() { - match tokens[i] { - Token::OpenBrace => { - let braced_block = get_braced_tokens(&tokens[i..], BRACES)?; - i += 2 + braced_block.len(); - // handle blocks marking the end of clauses, if/else being the exception - if i < tokens.len() { - if let Token::Else = tokens[i] { - i += 1; - let else_block = get_braced_tokens(&tokens[i..], BRACES)?; - i += 2 + else_block.len(); - } - } - return Ok(Some(&tokens[..i])); - } - Token::Semicolon => { - i += 1; - return Ok(Some(&tokens[..i])); - } - _ => { - i += 1; - } - } - } - - r_panic!("No clause could be found in: {tokens:#?}"); - } -} - -const SQUARE_BRACKETS: (Token, Token) = (Token::OpenSquareBracket, Token::ClosingSquareBracket); -const BRACES: (Token, Token) = (Token::OpenBrace, Token::ClosingBrace); -const PARENTHESES: (Token, Token) = (Token::OpenParenthesis, Token::ClosingParenthesis); -const ANGLED_BRACKETS: (Token, Token) = (Token::LessThan, Token::MoreThan); -// this should be a generic function but rust doesn't support enum variants as type arguments yet -// find tokens bounded by matching brackets -// TODO: make an impl for &[Token] and put all these functions in it -fn get_braced_tokens(tokens: &[Token], braces: (Token, Token)) -> Result<&[Token], String> { - let (open_brace, closing_brace) = (discriminant(&braces.0), discriminant(&braces.1)); - // find corresponding bracket, the depth check is unnecessary but whatever - let len = { - let mut i = 1usize; - let mut depth = 1; - while i < tokens.len() && depth > 0 { - let g = discriminant(&tokens[i]); - if g == open_brace { - depth += 1; - } else if g == closing_brace { - depth -= 1; - } - i += 1; - } - i - }; - - if len >= 2 { - if open_brace == discriminant(&tokens[0]) && closing_brace == discriminant(&tokens[len - 1]) - { - return Ok(&tokens[1..(len - 1)]); - } - } - r_panic!("Invalid braced tokens: {tokens:#?}"); -} diff --git a/compiler/src/parser/old_tokeniser.rs b/compiler/src/parser/old_tokeniser.rs deleted file mode 100644 index 517432e..0000000 --- a/compiler/src/parser/old_tokeniser.rs +++ /dev/null @@ -1,144 +0,0 @@ -// project dependencies: -use crate::macros::macros::{r_assert, r_panic}; - -// external dependencies: -use regex_lite::Regex; - -// TODO: refactor: combine tokeniser and parser into one -// make the inline brainfuck tokens contextual -pub fn tokenise(source: &str) -> Result, String> { - let stripped = source - .lines() - .map(strip_line) - .collect::>() - .join(" "); - - // mappings are a list of key * value tuples because we are doing "starts with" searches, - // meaning we can't look up in a hashtable - let mappings = [ - // (" ", Token::None), - (";", Token::Semicolon), - ("output", Token::Output), - ("input", Token::Input), - ("cell", Token::Cell), - ("struct", Token::Struct), - ("=", Token::EqualsSign), - ("while", Token::While), - ("drain", Token::Drain), - ("into", Token::Into), - ("else", Token::Else), - ("copy", Token::Copy), - ("bf", Token::Bf), - ("clobbers", Token::Clobbers), - ("assert", Token::Assert), - ("equals", Token::Equals), - ("unknown", Token::Unknown), - ("fn", Token::Fn), - ("if", Token::If), - ("not", Token::Not), - ("else", Token::Else), - ("{", Token::OpenBrace), - ("}", Token::ClosingBrace), - ("[", Token::OpenSquareBracket), - ("]", Token::ClosingSquareBracket), - ("(", Token::OpenParenthesis), - (")", Token::ClosingParenthesis), - ("<", Token::LessThan), - (">", Token::MoreThan), - ("^", Token::Caret), - ("true", Token::True), - ("false", Token::False), - (",", Token::Comma), - (".", Token::Dot), - ("*", Token::Asterisk), - ("@", Token::At), - ("-", Token::Minus), - ("+", Token::Plus), - ]; - // check for numbers and variables - let number_regex = Regex::new(r#"^[0-9]+"#).unwrap(); - let name_regex = Regex::new(r#"^[a-zA-Z_]\w*"#).unwrap(); - let string_regex = Regex::new(r#"^"(?:[^"\\]|\\.)*""#).unwrap(); - let character_regex = Regex::new(r#"^'(?:[^'\\]|\\.)'"#).unwrap(); - - let mut tokens: Vec = Vec::new(); - - let mut chr_idx = 0usize; - while chr_idx < stripped.len() { - let remaining = &stripped[chr_idx..]; - - if let Some(raw) = number_regex - .captures(remaining) - .map(|num_capture| String::from(&num_capture[0])) - { - chr_idx += raw.len(); - tokens.push(Token::Digits(raw)); - } else if let Some(raw) = name_regex - .captures(remaining) - .map(|name_capture| String::from(&name_capture[0])) - .take_if(|raw| { - mappings - .iter() - .find(|(keyword, _)| raw == *keyword) - .is_none() - }) { - chr_idx += raw.len(); - tokens.push(Token::Name(raw)); - } else if let Some(raw) = string_regex - .captures(remaining) - .map(|str_capture| String::from(&str_capture[0])) - { - chr_idx += raw.len(); - r_assert!( - raw.len() >= 2, - "Not enough characters in string literal token, \ -this should never occur. {raw:#?}" - ); - tokens.push(Token::String(tokenise_raw_string_literal( - &raw[1..(raw.len() - 1)], - )?)); - } else if let Some(raw) = character_regex - .captures(remaining) - .map(|chr_capture| String::from(&chr_capture[0])) - { - chr_idx += raw.len(); - r_assert!( - raw.len() >= 2, - "Not enough characters in character literal token, \ -this should never occur. {raw:#?}" - ); - tokens.push(Token::Character(tokenise_raw_character_literal( - &raw[1..(raw.len() - 1)], - )?)); - } else if let Some((text, token)) = mappings - .iter() - .find(|(text, _)| remaining.starts_with(text)) - { - tokens.push(token.clone()); - chr_idx += (*text).len(); - } else { - r_panic!("Unknown token found while tokenising program: \"{remaining}\""); - } - } - - Ok(tokens - .into_iter() - .filter(|t| !matches!(t, Token::None)) - .collect()) -} - -fn strip_line(line: &str) -> String { - let mut stripped = line; - // remove comments - let split = line.split_once("//"); - if let Some((one, _comment)) = split { - stripped = one; - } - - // remove excess whitespace - stripped - .trim() - .split_whitespace() - .collect::>() - .join(" ") -} diff --git a/compiler/src/parser/parser.rs b/compiler/src/parser/parser.rs index f712571..62945dc 100644 --- a/compiler/src/parser/parser.rs +++ b/compiler/src/parser/parser.rs @@ -7,16 +7,12 @@ use super::{ }, }; use crate::{ - backend::{ - bf::TapeCell, - bf2d::TapeCell2D, - common::{OpcodeVariant, TapeCellVariant}, - }, + backend::{bf::TapeCell, bf2d::TapeCell2D, common::OpcodeVariant}, macros::macros::{r_assert, r_panic}, parser::types::VariableTypeDefinition, }; -pub fn parse_program( +pub fn parse_program( raw: &str, ) -> Result>, String> { let program_chars: Vec = raw.chars().collect(); @@ -33,7 +29,7 @@ pub fn parse_program( Ok(clauses) } -fn parse_clause( +fn parse_clause( chars: &mut &[char], ) -> Result, String> { let mut s = *chars; @@ -41,6 +37,7 @@ fn parse_clause( Token::None => Clause::None, Token::LeftBrace => Clause::Block(parse_block_clauses(chars)?), Token::Output => parse_output_clause(chars)?, + Token::Input => parse_input_clause(chars)?, Token::If => parse_if_else_clause(chars)?, Token::While => parse_while_clause(chars)?, Token::Fn => parse_function_definition_clause(chars)?, @@ -57,13 +54,14 @@ fn parse_clause( Token::Cell => parse_let_clause(chars)?, Token::Name(_) => match next_token(&mut s)? { Token::LeftParenthesis => todo!(), - _ => todo!(), + _ => parse_assign_clause(chars)?, }, - token => r_panic!("Invalid starting token: `{token}`"), + Token::Drain | Token::Copy => parse_drain_copy_clause(chars)?, + token => r_panic!("Invalid starting token `{token}`."), }) } -fn parse_block_clauses( +fn parse_block_clauses( chars: &mut &[char], ) -> Result>, String> { let Token::LeftBrace = next_token(chars)? else { @@ -109,7 +107,7 @@ impl TapeCellLocation for TapeCell { Token::Name(_) => Ok(LocationSpecifier::Variable(parse_var_target(chars)?)), // TODO: add source snippet token => r_panic!( - "Unexpected token `{token}` found while parsing location specifier. (is 2D mode turned on?)" + "Unexpected `{token}` found while parsing location specifier. (is 2D mode turned on?)" ), } } @@ -144,7 +142,7 @@ impl TapeCellLocation for TapeCell2D { Token::Name(_) => Ok(LocationSpecifier::Variable(parse_var_target(chars)?)), // TODO: add source snippet token => { - r_panic!("Unexpected token `{token}` found while parsing 2D location specifier.") + r_panic!("Unexpected `{token}` found while parsing 2D location specifier.") } } } @@ -286,7 +284,7 @@ fn parse_integer(chars: &mut &[char]) -> Result { fn parse_integer_tuple(chars: &mut &[char]) -> Result<[i32; LENGTH], String> { let Token::LeftParenthesis = next_token(chars)? else { // TODO: add source snippet - r_panic!("Expected opening parenthesis in tuple.") + r_panic!("Expected opening parenthesis in {LENGTH}-tuple.") }; let mut tuple = [0; LENGTH]; @@ -296,13 +294,13 @@ fn parse_integer_tuple(chars: &mut &[char]) -> Result<[i32; if j < LENGTH - 1 { let Token::Comma = next_token(chars)? else { // TODO: add source snippet - r_panic!("Expected comma in tuple."); + r_panic!("Expected comma in {LENGTH}-tuple."); }; } } let Token::RightParenthesis = next_token(chars)? else { // TODO: add source snippet - r_panic!("Expected closing parenthesis in tuple."); + r_panic!("Expected closing parenthesis in {LENGTH}-tuple."); }; Ok(tuple) @@ -312,7 +310,7 @@ fn parse_integer_tuple(chars: &mut &[char]) -> Result<[i32; //////////////////////////// //////////////////////////// -fn parse_if_else_clause( +fn parse_if_else_clause( chars: &mut &[char], ) -> Result, String> { let Token::If = next_token(chars)? else { @@ -370,7 +368,7 @@ fn parse_if_else_clause( }) } -fn parse_while_clause( +fn parse_while_clause( chars: &mut &[char], ) -> Result, String> { let Token::While = next_token(chars)? else { @@ -398,7 +396,7 @@ fn parse_while_clause( }) } -fn parse_function_definition_clause( +fn parse_function_definition_clause( chars: &mut &[char], ) -> Result, String> { let Token::Fn = next_token(chars)? else { @@ -502,11 +500,113 @@ fn parse_output_clause(chars: &mut &[char]) -> Result, String r_panic!("Expected keyword `output` in output clause."); }; - let value = Expression::parse(chars)?; + let expr = Expression::parse(chars)?; let Token::Semicolon = next_token(chars)? else { r_panic!("Expected semicolon at end of output clause."); }; - Ok(Clause::Output { value }) + Ok(Clause::Output { value: expr }) +} + +fn parse_input_clause(chars: &mut &[char]) -> Result, String> { + let Token::Input = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected keyword `input` in input clause."); + }; + + let var = parse_var_target(chars)?; + + let Token::Semicolon = next_token(chars)? else { + r_panic!("Expected semicolon at end of input clause."); + }; + + Ok(Clause::Input { var }) +} + +fn parse_assign_clause(chars: &mut &[char]) -> Result, String> { + let var = parse_var_target(chars)?; + + let operator = next_token(chars)?; + match operator { + Token::EqualsSign | Token::PlusEquals | Token::MinusEquals => (), + token => r_panic!("Unexpected `{token}` in assignment clause."), + } + + let expr = Expression::parse(chars)?; + + // TODO: refactor this, at the very least make it nuanced per-cell, as this was added before subfields were added + let self_referencing = expr.check_self_referencing(&var); + + Ok(match operator { + Token::EqualsSign => Clause::Assign { + var, + value: expr, + self_referencing, + }, + Token::PlusEquals => Clause::AddAssign { + var, + value: expr, + self_referencing, + }, + Token::MinusEquals => Clause::AddAssign { + var, + value: expr.flipped_sign()?, + self_referencing, + }, + _ => unreachable!(), + }) +} + +/// parse a drain/copy loop: +/// `drain g {i += 1;};` +/// `drain g into j;` +/// `copy foo into bar {g += 2; etc;};` +fn parse_drain_copy_clause( + chars: &mut &[char], +) -> Result, String> { + let is_copying = match next_token(chars)? { + Token::Copy => true, + Token::Drain => false, + token => r_panic!("Unexpected `{token}` in drain/copy clause."), + }; + + let source = Expression::parse(chars)?; + + let mut targets = Vec::new(); + { + let mut s = *chars; + if let Token::Into = next_token(&mut s)? { + *chars = s; + loop { + // parse var target before delimiters because into must precede a target + targets.push(parse_var_target(chars)?); + { + let mut s = *chars; + if let Token::LeftBrace | Token::Semicolon = next_token(&mut s)? { + break; + } + } + } + } + } + + let block = { + let mut s = *chars; + match next_token(&mut s)? { + Token::LeftBrace => Some(parse_block_clauses(chars)?), + Token::Semicolon => { + *chars = s; + None + } + token => r_panic!("Unexpected `{token}` in drain/copy clause."), + } + }; + + Ok(Clause::DrainLoop { + source, + targets, + block, + is_copying, + }) } diff --git a/compiler/src/parser/tests.rs b/compiler/src/parser/tests.rs index 9ea5fef..4d79bcc 100644 --- a/compiler/src/parser/tests.rs +++ b/compiler/src/parser/tests.rs @@ -36,7 +36,7 @@ mod parser_tests { fn end_tokens_1() { assert_eq!( parse_program::("clobbers").unwrap_err(), - "Invalid starting token: `clobbers`" + "Invalid starting token `clobbers`." ); } @@ -44,15 +44,15 @@ mod parser_tests { fn end_tokens_2() { assert_eq!( parse_program::(";").unwrap_err(), - "Invalid starting token: `;`" + "Invalid starting token `;`." ); assert_eq!( parse_program::(";;").unwrap_err(), - "Invalid starting token: `;`" + "Invalid starting token `;`." ); assert_eq!( parse_program::(";;;").unwrap_err(), - "Invalid starting token: `;`" + "Invalid starting token `;`." ); } @@ -83,7 +83,7 @@ mod parser_tests { fn two_dimensional_1() { assert_eq!( parse_program::("cell x @(0, 1);").unwrap_err(), - "Unexpected token `(` found while parsing location specifier. (is 2D mode turned on?)" + "Unexpected `(` found while parsing location specifier. (is 2D mode turned on?)" ); } diff --git a/compiler/src/parser/types.rs b/compiler/src/parser/types.rs index 40cd81d..89694d7 100644 --- a/compiler/src/parser/types.rs +++ b/compiler/src/parser/types.rs @@ -21,12 +21,12 @@ pub enum Clause { name: String, fields: Vec, }, - AddToVariable { + AddAssign { var: VariableTarget, value: Expression, self_referencing: bool, }, - SetVariable { + Assign { var: VariableTarget, value: Expression, self_referencing: bool, @@ -37,11 +37,12 @@ pub enum Clause { // typically will either be used for assert unknown or assert 0 value: Option, }, - CopyLoop { + DrainLoop { source: Expression, targets: Vec, - block: Vec>, - is_draining: bool, + block: Option>>, + // TODO: reassess this syntax + is_copying: bool, }, While { var: VariableTarget, From c375a0a3e5fe2ef083e38903dd837a2ef6020055 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Mon, 10 Nov 2025 17:20:40 +1100 Subject: [PATCH 41/56] Fix parsing bug with assignments --- compiler/src/parser/parser.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/compiler/src/parser/parser.rs b/compiler/src/parser/parser.rs index 62945dc..94b1469 100644 --- a/compiler/src/parser/parser.rs +++ b/compiler/src/parser/parser.rs @@ -538,6 +538,18 @@ fn parse_assign_clause(chars: &mut &[char]) -> Result, String // TODO: refactor this, at the very least make it nuanced per-cell, as this was added before subfields were added let self_referencing = expr.check_self_referencing(&var); + let Token::Semicolon = next_token(chars)? else { + r_panic!( + "Expected semicolon at end of {} clause.", + match operator { + Token::EqualsSign => "assignment", + Token::PlusEquals => "add-assignment", + Token::MinusEquals => "subtract-assignment", + _ => unreachable!(), + } + ); + }; + Ok(match operator { Token::EqualsSign => Clause::Assign { var, From fec9257ea7be0b7b839fac0c888f63bfe07fc117 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Tue, 11 Nov 2025 12:11:59 +1100 Subject: [PATCH 42/56] Fix loop issues in parser --- compiler/src/parser/expressions.rs | 4 ++- compiler/src/parser/parser.rs | 54 ++++++++++++++-------------- compiler/src/parser/tests.rs | 58 +++++++++++++++++++++--------- compiler/src/tests.rs | 49 ++++++++++++++++++------- 4 files changed, 108 insertions(+), 57 deletions(-) diff --git a/compiler/src/parser/expressions.rs b/compiler/src/parser/expressions.rs index 3d4bb58..3cf3201 100644 --- a/compiler/src/parser/expressions.rs +++ b/compiler/src/parser/expressions.rs @@ -51,6 +51,7 @@ impl Expression { | Token::Comma | Token::LeftBrace | Token::Into + | Token::RightSquareBracket | Token::None) = next_token(&mut s)? else { // TODO: add source snippet @@ -90,6 +91,7 @@ impl Expression { | Token::Comma | Token::RightParenthesis | Token::RightSquareBracket + | Token::Into | Token::None, ) = next_token(&mut s) else { @@ -336,7 +338,7 @@ impl Display for Expression { let mut summands_iter = summands.iter(); // TODO: refactor to remove the need for this if let Some(first_expr) = summands_iter.next() { - f.write_fmt(format_args!("{first_expr}")); + f.write_fmt(format_args!("{first_expr}"))?; for expr in summands_iter { f.write_str(" ")?; match expr { diff --git a/compiler/src/parser/parser.rs b/compiler/src/parser/parser.rs index 94b1469..72ccdff 100644 --- a/compiler/src/parser/parser.rs +++ b/compiler/src/parser/parser.rs @@ -18,11 +18,7 @@ pub fn parse_program( let program_chars: Vec = raw.chars().collect(); let mut chars_slice = &program_chars[..]; let mut clauses = vec![]; - loop { - let clause = parse_clause(&mut chars_slice)?; - if let Clause::None = clause { - break; - } + while let Some(clause) = parse_clause(&mut chars_slice)? { clauses.push(clause); } @@ -31,32 +27,36 @@ pub fn parse_program( fn parse_clause( chars: &mut &[char], -) -> Result, String> { +) -> Result>, String> { let mut s = *chars; Ok(match next_token(&mut s)? { - Token::None => Clause::None, - Token::LeftBrace => Clause::Block(parse_block_clauses(chars)?), - Token::Output => parse_output_clause(chars)?, - Token::Input => parse_input_clause(chars)?, - Token::If => parse_if_else_clause(chars)?, - Token::While => parse_while_clause(chars)?, - Token::Fn => parse_function_definition_clause(chars)?, + Token::None => None, + Token::Semicolon => { + *chars = s; + Some(Clause::None) + } + Token::LeftBrace => Some(Clause::Block(parse_block_clauses(chars)?)), + Token::Output => Some(parse_output_clause(chars)?), + Token::Input => Some(parse_input_clause(chars)?), + Token::If => Some(parse_if_else_clause(chars)?), + Token::While => Some(parse_while_clause(chars)?), + Token::Fn => Some(parse_function_definition_clause(chars)?), Token::Struct => { let Token::Name(_) = next_token(&mut s)? else { // TODO: add source snippet r_panic!("Expected identifier after `struct` keyword."); }; match next_token(&mut s)? { - Token::LeftBrace => parse_struct_definition_clause(chars)?, - _ => parse_let_clause(chars)?, + Token::LeftBrace => Some(parse_struct_definition_clause(chars)?), + _ => Some(parse_let_clause(chars)?), } } - Token::Cell => parse_let_clause(chars)?, + Token::Cell => Some(parse_let_clause(chars)?), Token::Name(_) => match next_token(&mut s)? { Token::LeftParenthesis => todo!(), - _ => parse_assign_clause(chars)?, + _ => Some(parse_assign_clause(chars)?), }, - Token::Drain | Token::Copy => parse_drain_copy_clause(chars)?, + Token::Drain | Token::Copy => Some(parse_drain_copy_clause(chars)?), token => r_panic!("Invalid starting token `{token}`."), }) } @@ -77,10 +77,9 @@ fn parse_block_clauses( break; } } - let clause = parse_clause(chars)?; - if let Clause::None = clause { - break; - } + let Some(clause) = parse_clause(chars)? else { + r_panic!("Expected clause in code block. This should not occur."); + }; clauses.push(clause); } @@ -232,15 +231,17 @@ pub fn parse_var_target(chars: &mut &[char]) -> Result { }; let mut ref_chain = vec![]; - let mut s = *chars; loop { + let mut s = *chars; match next_token(&mut s)? { Token::LeftSquareBracket => { + s = *chars; let index = parse_subscript(chars)?; ref_chain.push(Reference::Index(index)); } Token::Dot => { - let Token::Name(subfield_name) = next_token(&mut s)? else { + *chars = s; + let Token::Name(subfield_name) = next_token(chars)? else { // TODO: add source snippet r_panic!("Expected subfield name in variable target identifier."); }; @@ -248,7 +249,6 @@ pub fn parse_var_target(chars: &mut &[char]) -> Result { } _ => break, } - *chars = s; } Ok(VariableTarget { @@ -543,8 +543,8 @@ fn parse_assign_clause(chars: &mut &[char]) -> Result, String "Expected semicolon at end of {} clause.", match operator { Token::EqualsSign => "assignment", - Token::PlusEquals => "add-assignment", - Token::MinusEquals => "subtract-assignment", + Token::PlusEquals => "addition-assignment", + Token::MinusEquals => "subtraction-assignment", _ => unreachable!(), } ); diff --git a/compiler/src/parser/tests.rs b/compiler/src/parser/tests.rs index 4d79bcc..e01ceb2 100644 --- a/compiler/src/parser/tests.rs +++ b/compiler/src/parser/tests.rs @@ -42,22 +42,6 @@ mod parser_tests { #[test] fn end_tokens_2() { - assert_eq!( - parse_program::(";").unwrap_err(), - "Invalid starting token `;`." - ); - assert_eq!( - parse_program::(";;").unwrap_err(), - "Invalid starting token `;`." - ); - assert_eq!( - parse_program::(";;;").unwrap_err(), - "Invalid starting token `;`." - ); - } - - #[test] - fn end_tokens_3() { assert_eq!( parse_program::("cell;").unwrap_err(), "Expected name in variable definition." @@ -573,9 +557,51 @@ struct nonsense[39] arr @-56 = 56 - ( 4+3+( -7-5 +(6)-(((( (0) )))) ) ); ); } + #[test] + fn empty_clauses_1() { + _parser_test(";", &[Clause::None]); + } + + #[test] + fn empty_clauses_1a() { + _parser_test("; ", &[Clause::None]); + _parser_test(";\n", &[Clause::None]); + _parser_test("\n;\n;\n", &[Clause::None, Clause::None]); + } + + #[test] + fn empty_clauses_2() { + _parser_test( + " ; ;{;output 3 ; ;} ; ; ", + &[ + Clause::None, + Clause::None, + Clause::Block(vec![ + Clause::None, + Clause::Output { + value: Expression::NaturalNumber(3), + }, + Clause::None, + ]), + Clause::None, + Clause::None, + ], + ); + } + #[test] fn blocks_1() { _parser_test("{}", &[Clause::Block(vec![])]); + _parser_test( + ";;{;;};;", + &[ + Clause::None, + Clause::None, + Clause::Block(vec![Clause::None, Clause::None]), + Clause::None, + Clause::None, + ], + ); } #[test] diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index 95ffb7b..7058927 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -95,7 +95,7 @@ pub mod black_box_tests { const TESTING_BVM_MAX_STEPS: usize = 100_000_000; fn compile_and_run<'a, TC: 'static + TapeCellVariant, OC: 'static + OpcodeVariant>( - program: &str, + raw_program: &str, input: &str, ) -> Result where @@ -104,7 +104,8 @@ pub mod black_box_tests { Vec: BrainfuckProgram, { let ctx = MastermindContext { config: OPT_NONE }; - let clauses = parse_program::(program)?; + + let clauses = parse_program::(raw_program)?; let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); let bf_program = ctx.ir_to_bf(instructions, None)?; let bfs = bf_program.to_string(); @@ -114,7 +115,7 @@ pub mod black_box_tests { } fn compile_program<'a, TC: 'static + TapeCellVariant, OC: 'static + OpcodeVariant>( - program: &str, + raw_program: &str, config: Option, ) -> Result where @@ -125,7 +126,7 @@ pub mod black_box_tests { let ctx = MastermindContext { config: config.unwrap_or(OPT_NONE), }; - let clauses = parse_program::(program)?; + let clauses = parse_program::(raw_program)?; let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); let bf_code = ctx.ir_to_bf(instructions, None)?; @@ -137,6 +138,11 @@ pub mod black_box_tests { assert_eq!(compile_and_run::("", "").unwrap(), ""); } + #[test] + fn empty_program_1a() { + assert_eq!(compile_and_run::(";;;", "").unwrap(), ""); + } + #[test] fn empty_program_2() { assert_eq!(compile_and_run::("{}", "").unwrap(), ""); @@ -145,13 +151,21 @@ pub mod black_box_tests { #[test] fn empty_program_2a() { assert_eq!( - compile_and_run::("{{{{}}}}", "").unwrap(), + compile_and_run::("{;;};", "").unwrap(), "" ); } #[test] fn empty_program_2b() { + assert_eq!( + compile_and_run::("{{{{}}}}", "").unwrap(), + "" + ); + } + + #[test] + fn empty_program_2c() { assert_eq!( compile_and_run::( "{{}} {} {{{}{}}} {{{ { }{ }} {{ }{ }}} {{{ }{}}{{} {}}}}", @@ -163,17 +177,26 @@ pub mod black_box_tests { } #[test] - fn empty_program_3() { + fn empty_program_2d() { assert_eq!( - compile_and_run::(";", "").unwrap_err(), + compile_and_run::( + "{{}} {} {{{}{}}} {{{ { }{ ;}}; {{ }{ }};} {{{; }{;};}{;{;};; {};}}}", + "" + ) + .unwrap(), "" ); } + #[test] + fn empty_program_3() { + assert_eq!(compile_and_run::(";", "").unwrap(), ""); + } + #[test] fn empty_program_3a() { assert_eq!( - compile_and_run::(";;;;;;", "").unwrap_err(), + compile_and_run::(";;;;;;", "").unwrap(), "" ); } @@ -181,7 +204,7 @@ pub mod black_box_tests { #[test] fn empty_program_3b() { assert_eq!( - compile_and_run::(";;{;{;};};;;", "").unwrap_err(), + compile_and_run::(";;{;{;};};;;", "").unwrap(), "" ); } @@ -1605,7 +1628,6 @@ output '\n'; } #[test] - #[should_panic] fn structs_4d() { let program = r#" struct AA a; @@ -1619,9 +1641,10 @@ struct AA { output a.reds[4]; output '\n'; "#; - let output = compile_and_run::(program, "0123a").expect(""); - println!("{output}"); - assert_eq!(output, "a\n"); + assert_eq!( + compile_and_run::(program, "0123a").unwrap_err(), + "" + ); } #[test] From 6776b624737f2ff35e2b8ed926d98005287f60a3 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Tue, 11 Nov 2025 13:00:13 +1100 Subject: [PATCH 43/56] Add comment stripping, assertion parsing --- compiler/src/lib.rs | 7 +- compiler/src/main.rs | 8 +- compiler/src/parser/old_parser.rs | 241 ------------------------------ compiler/src/parser/parser.rs | 39 ++++- compiler/src/preprocessor.rs | 58 +++++++ compiler/src/tests.rs | 8 +- 6 files changed, 110 insertions(+), 251 deletions(-) diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index 11ea637..66dfd01 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -20,7 +20,7 @@ use crate::{ brainfuck::{BrainfuckConfig, BrainfuckContext}, misc::MastermindContext, parser::parser::parse_program, - preprocessor::preprocess_from_memory, + preprocessor::{preprocess_from_memory, strip_comments}, }; // stdlib dependencies: @@ -51,13 +51,14 @@ pub fn wasm_compile( }; let preprocessed_file = preprocess_from_memory(&file_contents, entry_file_name)?; + let stripped_file = strip_comments(&preprocessed_file); if ctx.config.enable_2d_grid { - let parsed_syntax = parse_program::(&preprocessed_file)?; + let parsed_syntax = parse_program::(&stripped_file)?; let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); let bf_code = ctx.ir_to_bf(instructions, None)?; Ok(bf_code.to_string()) } else { - let parsed_syntax = parse_program::(&preprocessed_file)?; + let parsed_syntax = parse_program::(&stripped_file)?; let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); let bf_code = ctx.ir_to_bf(instructions, None)?; Ok(bf_code.to_string()) diff --git a/compiler/src/main.rs b/compiler/src/main.rs index a7ddcdb..3169326 100644 --- a/compiler/src/main.rs +++ b/compiler/src/main.rs @@ -20,7 +20,7 @@ use crate::{ brainfuck::{BrainfuckConfig, BrainfuckContext}, misc::{MastermindConfig, MastermindContext}, parser::parser::parse_program, - preprocessor::preprocess, + preprocessor::{preprocess, strip_comments}, }; // stdlib dependencies: @@ -71,6 +71,7 @@ struct Arguments { } fn main() -> Result<(), String> { + // TODO: clean up this crazy file, this was the first ever rust I wrote and it's messy std::env::set_var("RUST_BACKTRACE", "1"); let args = Arguments::parse(); @@ -92,14 +93,15 @@ fn main() -> Result<(), String> { let bf_program = match args.compile { true => { + let stripped_program = strip_comments(&program); // compile the provided file if ctx.config.enable_2d_grid { - let parsed_syntax = parse_program::(&program)?; + let parsed_syntax = parse_program::(&stripped_program)?; let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); let bf_code = ctx.ir_to_bf(instructions, None)?; bf_code.to_string() } else { - let parsed_syntax = parse_program::(&program)?; + let parsed_syntax = parse_program::(&stripped_program)?; let instructions = ctx.create_ir_scope(&parsed_syntax, None)?.build_ir(false); let bf_code = ctx.ir_to_bf(instructions, None)?; bf_code.to_string() diff --git a/compiler/src/parser/old_parser.rs b/compiler/src/parser/old_parser.rs index 9abb8ca..c7494a5 100644 --- a/compiler/src/parser/old_parser.rs +++ b/compiler/src/parser/old_parser.rs @@ -8,247 +8,6 @@ use crate::{ // stdlib dependencies use std::{fmt::Display, mem::discriminant, num::Wrapping}; -/// recursive function to create a tree representation of the program -pub fn parse_clause_from_tokens( - tokens: &[Token], - blocks: Vec>>, -) -> Result>, String> { - Ok(match (&tokens[0], &tokens.get(1), &tokens.get(2)) { - (Token::Cell, _, _) - | (Token::Struct, Some(Token::Name(_)), Some(Token::Name(_) | Token::OpenSquareBracket)) => { - Some(parse_let_clause(tokens)?) - } - (Token::Struct, Some(Token::Name(_)), Some(Token::OpenBrace)) => { - Some(parse_struct_clause(tokens)?) - } - (Token::Plus, Some(Token::Plus), _) | (Token::Minus, Some(Token::Minus), _) => { - Some(parse_increment_clause(tokens)?) - } - (Token::Name(_), Some(Token::EqualsSign | Token::Dot | Token::OpenSquareBracket), _) => { - Some(parse_set_clause(clause_tokens)?) - } - (Token::Drain, _, _) => Some(parse_drain_copy_clause( - tokens, - true, - blocks - .get(0) - .ok_or(format!("Expected code block in drain clause: {tokens:#?}"))?, - )?), - (Token::Copy, _, _) => { - clauses.push(parse_drain_copy_clause(clause_tokens, false)?); - } - (Token::While, _, _) => { - clauses.push(parse_while_clause(clause_tokens)?); - } - (Token::Output, _, _) => { - clauses.push(parse_output_clause(clause_tokens)?); - } - (Token::Input, _, _) => { - clauses.push(parse_input_clause(clause_tokens)?); - } - (Token::Name(_), Some(Token::OpenParenthesis), _) => { - clauses.push(parse_function_call_clause(clause_tokens)?); - } - (Token::Fn, _, _) => { - clauses.push(parse_function_definition_clause(clause_tokens)?); - } - (Token::Name(_), Token::Plus | Token::Minus, Token::EqualsSign) => { - clauses.extend(parse_add_clause(clause_tokens)?); - } - (Token::If, _, _) => { - clauses.push(parse_if_else_clause(clause_tokens)?); - } - (Token::OpenBrace, _, _) => { - let braced_tokens = get_braced_tokens(clause_tokens, BRACES)?; - let inner_clauses = parse(braced_tokens)?; - clauses.push(Clause::Block(inner_clauses)); - } - (Token::Assert, _, _) => Some(parse_assert_clause(tokens)?), - // empty clause - (Token::Semicolon, _, _) => None, - // the None token usually represents whitespace, it should be filtered out before reaching this function - // Wrote out all of these possibilities so that the compiler will tell me when I haven't implemented a token - ( - Token::Else - | Token::Not - | Token::ClosingBrace - | Token::OpenSquareBracket - | Token::ClosingSquareBracket - | Token::OpenParenthesis - | Token::ClosingParenthesis - | Token::Comma - | Token::Plus - | Token::Minus - | Token::Into - | Token::Digits(_) - | Token::Name(_) - | Token::String(_) - | Token::Character(_) - | Token::True - | Token::False - | Token::EqualsSign - | Token::Asterisk - | Token::Clobbers - | Token::Equals - | Token::Unknown - | Token::Dot - | Token::At - | Token::Struct, - _, - _, - ) => r_panic!("Invalid clause: {tokens:#?}"), - }) -} - -// currently just syntax sugar, should make it actually do post/pre increments -fn parse_increment_clause(clause: &[Token]) -> Result, String> { - let (var, _) = parse_var_target(&clause[2..])?; - //An increment clause can never be self referencing since it just VAR++ - Ok(match (&clause[0], &clause[1]) { - (Token::Plus, Token::Plus) => Clause::AddToVariable { - var, - value: Expression::NaturalNumber(1), - self_referencing: false, - }, - (Token::Minus, Token::Minus) => Clause::AddToVariable { - var, - value: Expression::NaturalNumber((-1i8 as u8) as usize), - self_referencing: false, - }, - _ => { - r_panic!("Invalid pattern in increment clause: {clause:#?}"); - } - }) - // assumed that the final token is a semicolon -} - -fn parse_set_clause(clause: &[Token]) -> Result, String> { - let mut i = 0usize; - let (var, len) = parse_var_target(&clause[i..])?; - i += len; - - Ok(match &clause[i] { - Token::EqualsSign => { - i += 1; - let expr = Expression::parse(&clause[i..(clause.len() - 1)])?; - let self_referencing = expr.check_self_referencing(&var); - Clause::SetVariable { - var, - value: expr, - self_referencing, - } - } - Token::Plus | Token::Minus => { - let is_add = if let Token::Plus = &clause[i] { - true - } else { - false - }; - i += 1; - let Token::EqualsSign = &clause[i] else { - r_panic!("Expected equals sign in add-assign operator: {clause:#?}"); - }; - i += 1; - - let mut expr = Expression::parse(&clause[i..(clause.len() - 1)])?; - if !is_add { - expr = expr.flipped_sign()?; - } - - let self_referencing = expr.check_self_referencing(&var); - Clause::AddToVariable { - var, - value: expr, - self_referencing, - } - } - _ => r_panic!("Expected assignment operator in set clause: {clause:#?}"), - }) -} - -fn parse_if_else_clause( - clause: &[Token], -) -> Result, String> { - // skip first token, assumed to start with if - let mut i = 1usize; - let mut not = false; - if let Token::Not = &clause[i] { - not = true; - i += 1; - } - - let condition_start_token = i; - - i += 1; - while let Some(token) = clause.get(i) { - if let Token::OpenBrace = token { - break; - } - i += 1; - } - r_assert!( - i < clause.len(), - "Expected condition and block in if statement: {clause:#?}" - ); - - let condition = Expression::parse(&clause[condition_start_token..i])?; - - let block_one = { - let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; - i += 2 + block_tokens.len(); - parse(block_tokens)? - }; - - let block_two = if let Some(Token::Else) = &clause.get(i) { - i += 1; - let block_tokens = get_braced_tokens(&clause[i..], BRACES)?; - // i += 2 + block_tokens.len(); - Some(parse(block_tokens)?) - } else { - None - }; - - Ok(match (not, block_one, block_two) { - (false, block_one, block_two) => Clause::IfElse { - condition, - if_block: Some(block_one), - else_block: block_two, - }, - (true, block_one, block_two) => Clause::IfElse { - condition, - if_block: block_two, - else_block: Some(block_one), - }, - }) -} - -fn parse_output_clause(clause: &[Token]) -> Result, String> { - let mut i = 1usize; - - let expr_tokens = &clause[i..(clause.len() - 1)]; - let expr = Expression::parse(expr_tokens)?; - i += expr_tokens.len(); - - let Token::Semicolon = &clause[i] else { - r_panic!("Invalid token at end of output clause: {clause:#?}"); - }; - - Ok(Clause::OutputValue { value: expr }) -} - -fn parse_input_clause(clause: &[Token]) -> Result, String> { - let mut i = 1usize; - - let (var, len) = parse_var_target(&clause[i..])?; - i += len; - - let Token::Semicolon = &clause[i] else { - r_panic!("Invalid token at end of input clause: {clause:#?}"); - }; - - Ok(Clause::InputVariable { var }) -} - fn parse_assert_clause(clause: &[Token]) -> Result, String> { let mut i = 1usize; diff --git a/compiler/src/parser/parser.rs b/compiler/src/parser/parser.rs index 72ccdff..6811653 100644 --- a/compiler/src/parser/parser.rs +++ b/compiler/src/parser/parser.rs @@ -29,6 +29,7 @@ fn parse_clause( chars: &mut &[char], ) -> Result>, String> { let mut s = *chars; + // TODO: decide whether comments should be handled in the parser or not? Ok(match next_token(&mut s)? { Token::None => None, Token::Semicolon => { @@ -41,6 +42,7 @@ fn parse_clause( Token::If => Some(parse_if_else_clause(chars)?), Token::While => Some(parse_while_clause(chars)?), Token::Fn => Some(parse_function_definition_clause(chars)?), + Token::Assert => Some(parse_assert_clause(chars)?), Token::Struct => { let Token::Name(_) = next_token(&mut s)? else { // TODO: add source snippet @@ -57,6 +59,22 @@ fn parse_clause( _ => Some(parse_assign_clause(chars)?), }, Token::Drain | Token::Copy => Some(parse_drain_copy_clause(chars)?), + Token::PlusPlus => { + *chars = s; + Some(Clause::AddAssign { + var: parse_var_target(chars)?, + value: Expression::NaturalNumber(1), + self_referencing: false, + }) + } + Token::MinusMinus => { + *chars = s; + Some(Clause::AddAssign { + var: parse_var_target(chars)?, + value: Expression::NaturalNumber((-1i8 as u8) as usize), + self_referencing: false, + }) + } token => r_panic!("Invalid starting token `{token}`."), }) } @@ -235,7 +253,6 @@ pub fn parse_var_target(chars: &mut &[char]) -> Result { let mut s = *chars; match next_token(&mut s)? { Token::LeftSquareBracket => { - s = *chars; let index = parse_subscript(chars)?; ref_chain.push(Reference::Index(index)); } @@ -622,3 +639,23 @@ fn parse_drain_copy_clause( is_copying, }) } + +fn parse_assert_clause(chars: &mut &[char]) -> Result, String> { + let Token::Assert = next_token(chars)? else { + r_panic!("Expected `assert` in assert clause."); + }; + + let var = parse_var_target(chars)?; + + let value = match next_token(chars)? { + Token::Unknown => None, + Token::Equals => Some(Expression::parse(chars)?), + token => r_panic!("Unexpected `{token}` in assert clause."), + }; + + let Token::Semicolon = next_token(chars)? else { + r_panic!("Expected semicolon at end of assert clause."); + }; + + Ok(Clause::AssertVariableValue { var, value }) +} diff --git a/compiler/src/preprocessor.rs b/compiler/src/preprocessor.rs index 98542b7..bcf9ea8 100644 --- a/compiler/src/preprocessor.rs +++ b/compiler/src/preprocessor.rs @@ -1,8 +1,12 @@ // take in a file, read includes and simple conditionals and output a file with those includes pasted in // C-style +// TODO: add tests for this! + use std::{collections::HashMap, path::PathBuf}; +use itertools::Itertools; + use crate::macros::macros::r_assert; pub fn preprocess(file_path: PathBuf) -> String { @@ -72,3 +76,57 @@ pub fn preprocess_from_memory( Ok(acc) } + +/// strips comments from input program, does not support anything else +pub fn strip_comments(raw_program: &str) -> String { + let mut stripped = raw_program + .lines() + .map(|line| line.split_once("//").map_or_else(|| line, |(left, _)| left)) + .join("\n"); + // join doesn't add a newline to the end, here we re-add it, this is probably unnecessary + if raw_program.ends_with("\n") { + stripped.push_str("\n"); + } + stripped +} + +#[cfg(test)] +pub mod preprocessor_tests { + use crate::preprocessor::strip_comments; + + #[test] + fn comments_0() { + assert_eq!(strip_comments(""), ""); + assert_eq!(strip_comments("\n\t\t\n"), "\n\t\t\n"); + } + + #[test] + fn comments_1() { + assert_eq!(strip_comments("hi//hello"), "hi"); + } + + #[test] + fn comments_2() { + assert_eq!(strip_comments("h//i // hello"), "h"); + } + + #[test] + fn comments_3() { + assert_eq!( + strip_comments( + r#" +hello // don't talk to me +second line +// third line comment +fourth line +"# + ), + r#" +hello +second line + +fourth line +"# + ); + } +} diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index 7058927..fd584ab 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -15,6 +15,7 @@ pub mod black_box_tests { brainfuck::{bvm_tests::run_code, BrainfuckConfig}, misc::{MastermindConfig, MastermindContext}, parser::parser::parse_program, + preprocessor::strip_comments, }; // TODO: run test suite with different optimisations turned on const OPT_NONE: MastermindConfig = MastermindConfig { @@ -104,8 +105,8 @@ pub mod black_box_tests { Vec: BrainfuckProgram, { let ctx = MastermindContext { config: OPT_NONE }; - - let clauses = parse_program::(raw_program)?; + let stripped_program = strip_comments(raw_program); + let clauses = parse_program::(&stripped_program)?; let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); let bf_program = ctx.ir_to_bf(instructions, None)?; let bfs = bf_program.to_string(); @@ -126,7 +127,8 @@ pub mod black_box_tests { let ctx = MastermindContext { config: config.unwrap_or(OPT_NONE), }; - let clauses = parse_program::(raw_program)?; + let stripped_program = strip_comments(raw_program); + let clauses = parse_program::(&stripped_program)?; let instructions = ctx.create_ir_scope(&clauses, None)?.build_ir(false); let bf_code = ctx.ir_to_bf(instructions, None)?; From 93f811e4e8f3f4cf820846e47ec1423523870a85 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Tue, 11 Nov 2025 20:13:12 +1100 Subject: [PATCH 44/56] Add function call parsing and change how frontend handles function arguments --- compiler/src/frontend.rs | 340 +++++++++++++++++++++++++++++----- compiler/src/parser/parser.rs | 40 +++- compiler/src/parser/types.rs | 2 +- 3 files changed, 333 insertions(+), 49 deletions(-) diff --git a/compiler/src/frontend.rs b/compiler/src/frontend.rs index 989df7b..27fe4f9 100644 --- a/compiler/src/frontend.rs +++ b/compiler/src/frontend.rs @@ -596,32 +596,36 @@ impl MastermindContext { } => { // create variable translations and recursively compile the inner variable block - let calling_argument_types = arguments + // get the calling arguments' types + let calling_argument_types: Vec = arguments .iter() - .map(|a| scope.get_target_type(&a)) - .collect::, _>>()?; + .map(|arg| scope.get_expression_type(arg)) + .collect::, String>>()?; + // find the function based on name * types let function_definition = scope.get_function(&function_name, &calling_argument_types)?; + // create mappings in a new translation scope, so mappings will be removed once scope closes let mut argument_translation_scope = scope.open_inner(); - - // TODO: refactor this mess - // deal with argument memory mappings: - for ((calling_argument, calling_argument_type), (arg_name, expected_type)) in - zip( - zip(arguments, calling_argument_types), - function_definition.arguments.iter(), - ) { - // TODO: fix this duplicate call, get_target_type() internally gets the memory allocation details - // then these are gotten again in create_mapped_variable() - r_assert!(calling_argument_type == expected_type, "Expected argument of type \"{expected_type:#?}\" in function call \"{function_name}\", received argument of type \"{calling_argument_type:#?}\". This should not occur"); - // register an argument translation in the scope + assert_eq!(arguments.len(), function_definition.arguments.len()); + for (calling_expr, (arg_name, _)) in + zip(arguments, function_definition.arguments) + { + // TODO: allow expressions as arguments: create a new variable instead of mapping when a value needs to be computed + let calling_arg = match calling_expr { + Expression::VariableReference(var) => var, + expr => r_panic!( + "Expected variable target in function call argument, \ +found expression `{expr}`. General expressions as \ +function arguments are not supported." + ), + }; argument_translation_scope - .create_mapped_variable(arg_name.clone(), &calling_argument)?; + .create_mapped_variable(arg_name, &calling_arg)?; } - // recurse + // recursively compile the function block let function_scope = self.create_ir_scope( &function_definition.block, Some(&argument_translation_scope), @@ -630,8 +634,8 @@ impl MastermindContext { .instructions .extend(function_scope.build_ir(true)); - // extend the inner scope instructions onto the outer scope - // maybe function call compiling should be its own function? + // add the recursively compiled instructions to the current scope's built instructions + // TODO: figure out why this .build_ir() call uses clean_up_variables = false scope .instructions .extend(argument_translation_scope.build_ir(false)); @@ -891,6 +895,34 @@ impl ValueType { } } +impl Display for ValueType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ValueType::Cell => { + f.write_str("cell")?; + } + ValueType::Array(length, element_type) => { + f.write_fmt(format_args!("{element_type}[{length}]"))?; + } + ValueType::DictStruct(fields) => { + f.write_str("{")?; + let fields_len = fields.len(); + for (i, (field_name, field_type, offset)) in fields.iter().enumerate() { + f.write_fmt(format_args!("{field_type} {field_name}"))?; + if let Some(offset) = offset { + f.write_fmt(format_args!(" @{offset}"))?; + } + f.write_str(";")?; + if i < (fields_len - 1) { + f.write_str(" ")?; + } + } + } + } + Ok(()) + } +} + impl ScopeBuilder<'_, TC, OC> where TC: Display + Clone, @@ -908,6 +940,7 @@ where } } + // regarding `clean_up_variables`: // I don't love this system of deciding what to clean up at the end in this specific function, but I'm not sure what the best way to achieve this would be // this used to be called "get_instructions" but I think this more implies things are being modified pub fn build_ir(mut self, clean_up_variables: bool) -> Vec> { @@ -1050,36 +1083,38 @@ where } } + /// find a function definition based on name and argument types (unaffected by the self.fn_only flag) fn get_function( &self, calling_name: &str, - calling_arg_types: &Vec<&ValueType>, + calling_arg_types: &Vec, ) -> Result, String> { - // this function is unaffected by the self.fn_only flag - Ok( - if let Some(func) = self.functions.iter().find(|(name, args, _)| { - if name != calling_name || args.len() != calling_arg_types.len() { + if let Some(func) = self.functions.iter().find(|(name, args, _)| { + if name != calling_name || args.len() != calling_arg_types.len() { + return false; + } + for ((_, arg_type), calling_arg_type) in zip(args, calling_arg_types) { + if *arg_type != *calling_arg_type { return false; } - for ((_, arg_type), calling_arg_type) in zip(args, calling_arg_types) { - if *arg_type != **calling_arg_type { - return false; - } - } - true - }) { - // TODO: stop cloning! This function overload stuff is tacked on and needs refactoring - let (_, arguments, block) = func; - Function { - arguments: arguments.clone(), - block: block.clone(), - } - } else if let Some(outer_scope) = self.outer_scope { - outer_scope.get_function(calling_name, calling_arg_types)? - } else { - r_panic!("Could not find function \"{calling_name}\" with correct arguments in current scope"); - }, - ) + } + true + }) { + // TODO: stop cloning! This function overload stuff is tacked on and needs refactoring + let (_, arguments, block) = func; + return Ok(Function { + arguments: arguments.clone(), + block: block.clone(), + }); + } + + if let Some(outer_scope) = self.outer_scope { + return outer_scope.get_function(calling_name, calling_arg_types); + } + + r_panic!( + "Could not find function \"{calling_name}\" with correct arguments in current scope" + ); } /// Define a struct in this scope @@ -1116,7 +1151,7 @@ where new_arguments: Vec>, new_block: Vec>, ) -> Result<(), String> { - let absolute_arguments = new_arguments + let absolute_arguments: Vec<(String, ValueType)> = new_arguments .into_iter() .map(|f| { let LocationSpecifier::None = f.location_specifier else { @@ -1124,8 +1159,9 @@ where }; Ok((f.name, self.create_absolute_type(&f.var_type)?)) }) - .collect::, _>>()?; + .collect::, String>>()?; + // TODO: refactor this: // This is some fucked C-style loop break logic, basically GOTOs // basically it only gets to the panic if the functions have identical signature (except argument names) 'func_loop: for (name, args, _) in self.functions.iter() { @@ -1436,7 +1472,8 @@ where }) } - /// Create memory mapping between a pre-existing variable and a new one, used for function arguments + /// Create memory mapping between a pre-existing variable and a new one, used for function arguments. + /// This could be used for copy by reference of subfields in future. fn create_mapped_variable( &mut self, mapped_var_name: String, @@ -1531,6 +1568,56 @@ mapping: {mapped_var_name} -> {target}" Ok(()) } + /// Get the final type of an expression. + /// (technically unnecessary right now, but can be used to implement expressions as function arguments in future) + fn get_expression_type(&self, expr: &Expression) -> Result { + Ok(match expr { + Expression::NaturalNumber(_) => ValueType::Cell, + Expression::SumExpression { sign: _, summands } => { + let Some(_) = summands.first() else { + r_panic!( + "Cannot infer expression type because sum \ +expression has no elements: `{expr}`." + ); + }; + // TODO: decide if the summands' types should be verified here or not + for summand in summands { + match self.get_expression_type(summand)? { + ValueType::Cell => (), + summand_type => { + r_panic!( + "Sum expressions must be comprised of cell-types: \ +found `{summand_type}` in `{expr}`" + ); + } + }; + } + ValueType::Cell + } + Expression::VariableReference(var) => self.get_target_type(var)?.clone(), + Expression::ArrayLiteral(elements) => { + let mut elements_iter = elements.iter(); + let Some(first_element) = elements_iter.next() else { + r_panic!( + "Cannot infer expression type because \ +array literal has no elements: `{expr}`." + ); + }; + let first_element_type = self.get_expression_type(first_element)?; + for element in elements_iter { + let element_type = self.get_expression_type(element)?; + r_assert!( + element_type == first_element_type, + "All elements in array expressions must have the \ +same type: found `{element_type}` in `{expr}`" + ); + } + ValueType::Array(elements.len(), Box::new(first_element_type)) + } + Expression::StringLiteral(s) => ValueType::Array(s.len(), Box::new(ValueType::Cell)), + }) + } + /// helper function for a common use-case: /// flatten an expression and add it to a specific cell (using copies and adds, etc) fn _add_expr_to_cell(&mut self, expr: &Expression, cell: CellReference) -> Result<(), String> { @@ -1649,3 +1736,164 @@ mapping: {mapped_var_name} -> {target}" self.push_instruction(Instruction::Free(temp_mem_id)); } } + +// TODO: think about where to put these tests, and by extension where to put the scopebuilder +#[cfg(test)] +mod scope_builder_tests { + use crate::{ + backend::bf::{Opcode, TapeCell}, + parser::expressions::Sign, + }; + + use super::*; + + #[test] + fn variable_allocation_1() { + let mut scope = ScopeBuilder::::new(); + let allocated_type = scope.allocate_variable(VariableTypeDefinition { + name: String::from("var"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None, + }); + assert_eq!(allocated_type, Ok(&ValueType::Cell)); + } + + #[test] + fn get_expression_type_numbers_1() { + let scope = ScopeBuilder::::new(); + assert_eq!( + scope + .get_expression_type(&Expression::NaturalNumber(0)) + .unwrap(), + ValueType::Cell + ); + assert_eq!( + scope + .get_expression_type(&Expression::NaturalNumber(1)) + .unwrap(), + ValueType::Cell + ); + assert_eq!( + scope + .get_expression_type(&Expression::NaturalNumber(345678)) + .unwrap(), + ValueType::Cell + ); + } + + #[test] + fn get_expression_type_sums_1() { + let scope = ScopeBuilder::::new(); + assert_eq!( + scope + .get_expression_type(&Expression::SumExpression { + sign: Sign::Positive, + summands: vec![Expression::NaturalNumber(0)] + }) + .unwrap(), + ValueType::Cell + ); + assert_eq!( + scope + .get_expression_type(&Expression::SumExpression { + sign: Sign::Negative, + summands: vec![ + Expression::NaturalNumber(345678), + Expression::NaturalNumber(2) + ] + }) + .unwrap(), + ValueType::Cell + ); + assert_eq!( + scope + .get_expression_type(&Expression::SumExpression { + sign: Sign::Positive, + summands: vec![ + Expression::SumExpression { + sign: Sign::Negative, + summands: vec![ + Expression::NaturalNumber(1), + Expression::NaturalNumber(2) + ] + }, + Expression::NaturalNumber(2) + ] + }) + .unwrap(), + ValueType::Cell + ); + } + + #[test] + fn get_expression_type_variables_1() { + let mut scope = ScopeBuilder::::new(); + scope + .allocate_variable(VariableTypeDefinition { + name: String::from("var"), + var_type: VariableTypeReference::Cell, + location_specifier: LocationSpecifier::None, + }) + .unwrap(); + assert_eq!( + scope + .get_expression_type(&Expression::VariableReference(VariableTarget { + name: String::from("var"), + subfields: None, + is_spread: false + })) + .unwrap(), + ValueType::Cell + ); + assert_eq!( + scope + .get_expression_type(&Expression::SumExpression { + sign: Sign::Positive, + summands: vec![ + Expression::VariableReference(VariableTarget { + name: String::from("var"), + subfields: None, + is_spread: false + }), + Expression::NaturalNumber(123) + ] + }) + .unwrap(), + ValueType::Cell + ); + } + + #[test] + fn get_expression_type_arrays_1() { + let mut scope = ScopeBuilder::::new(); + scope + .allocate_variable(VariableTypeDefinition { + name: String::from("arr"), + var_type: VariableTypeReference::Array(Box::new(VariableTypeReference::Cell), 3), + location_specifier: LocationSpecifier::None, + }) + .unwrap(); + assert_eq!( + scope + .get_expression_type(&Expression::VariableReference(VariableTarget { + name: String::from("arr"), + subfields: None, + is_spread: false + })) + .unwrap(), + ValueType::Array(3, Box::new(ValueType::Cell)) + ); + assert_eq!( + scope + .get_expression_type(&Expression::VariableReference(VariableTarget { + name: String::from("arr"), + subfields: Some(VariableTargetReferenceChain(vec![Reference::Index(0)])), + is_spread: false + })) + .unwrap(), + ValueType::Cell + ); + } + + // TODO: make failure tests for expression types +} diff --git a/compiler/src/parser/parser.rs b/compiler/src/parser/parser.rs index 6811653..e70d3cc 100644 --- a/compiler/src/parser/parser.rs +++ b/compiler/src/parser/parser.rs @@ -55,7 +55,7 @@ fn parse_clause( } Token::Cell => Some(parse_let_clause(chars)?), Token::Name(_) => match next_token(&mut s)? { - Token::LeftParenthesis => todo!(), + Token::LeftParenthesis => Some(parse_function_call_clause(chars)?), _ => Some(parse_assign_clause(chars)?), }, Token::Drain | Token::Copy => Some(parse_drain_copy_clause(chars)?), @@ -445,7 +445,7 @@ fn parse_function_definition_clause( Token::RightParenthesis => break, Token::Comma => (), // TODO: add source snippet - _ => r_panic!("Unexpected token in function argument list."), + _ => r_panic!("Unexpected token in function definition arguments."), } } @@ -456,6 +456,42 @@ fn parse_function_definition_clause( }) } +fn parse_function_call_clause(chars: &mut &[char]) -> Result, String> { + let Token::Name(function_name) = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected function name in function call clause."); + }; + + let Token::LeftParenthesis = next_token(chars)? else { + // TODO: add source snippet + r_panic!("Expected `(` in function call clause."); + }; + + let mut arguments = vec![]; + loop { + { + let mut s = *chars; + if let Token::RightParenthesis = next_token(&mut s)? { + *chars = s; + break; + } + } + arguments.push(Expression::parse(chars)?); + + match next_token(chars)? { + Token::RightParenthesis => break, + Token::Comma => (), + // TODO: add source snippet + _ => r_panic!("Unexpected token in function call arguments."), + } + } + + Ok(Clause::CallFunction { + function_name, + arguments, + }) +} + /// Parse tokens representing a struct definition into a clause fn parse_struct_definition_clause( chars: &mut &[char], diff --git a/compiler/src/parser/types.rs b/compiler/src/parser/types.rs index 89694d7..2fd299e 100644 --- a/compiler/src/parser/types.rs +++ b/compiler/src/parser/types.rs @@ -62,7 +62,7 @@ pub enum Clause { }, CallFunction { function_name: String, - arguments: Vec, + arguments: Vec, }, If { condition: Expression, From e4c362e86667b5c0d7c7fa61147e52373911126b Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Tue, 11 Nov 2025 20:18:28 +1100 Subject: [PATCH 45/56] Fix issue with struct definition parsing --- compiler/src/parser/parser.rs | 12 ++++++++---- compiler/src/tests.rs | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/compiler/src/parser/parser.rs b/compiler/src/parser/parser.rs index e70d3cc..b67187b 100644 --- a/compiler/src/parser/parser.rs +++ b/compiler/src/parser/parser.rs @@ -186,9 +186,9 @@ fn parse_var_type_definition( VariableTypeReference::Struct(struct_name) } - _ => { + token => { // TODO: add source snippet - r_panic!("Unexpected token in variable type definition."); + r_panic!("Unexpected `{token}` found in variable type definition."); } }; @@ -519,8 +519,12 @@ fn parse_struct_definition_clause( // TODO: add source snippet r_panic!("Expected semicolon after struct definition field."); }; - if let Token::RightBrace = next_token(chars)? { - break; + { + let mut s = *chars; + if let Token::RightBrace = next_token(&mut s)? { + *chars = s; + break; + } } } diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index fd584ab..1e54057 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -1645,7 +1645,7 @@ output '\n'; "#; assert_eq!( compile_and_run::(program, "0123a").unwrap_err(), - "" + "Index \"[4]\" must be less than array length (4)." ); } From 6f2f236c3359f1991bde2c17275d0b0e9996558a Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Tue, 11 Nov 2025 21:54:43 +1100 Subject: [PATCH 46/56] Fix all tests with new parser --- compiler/src/backend/bf.rs | 24 +++++------ compiler/src/backend/bf2d.rs | 29 +++++++------ compiler/src/backend/common.rs | 2 +- compiler/src/frontend.rs | 2 +- compiler/src/parser/parser.rs | 72 +++++++++++++++++++++++++++++++- compiler/src/parser/tests.rs | 8 ++-- compiler/src/parser/tokeniser.rs | 2 +- compiler/src/parser/types.rs | 2 +- compiler/src/tests.rs | 7 ++-- 9 files changed, 108 insertions(+), 40 deletions(-) diff --git a/compiler/src/backend/bf.rs b/compiler/src/backend/bf.rs index 261a881..18356a7 100644 --- a/compiler/src/backend/bf.rs +++ b/compiler/src/backend/bf.rs @@ -32,18 +32,18 @@ pub enum Opcode { } impl OpcodeVariant for Opcode { - fn from_token(token: &Token) -> Result { - Ok(match token { - Token::Plus => Opcode::Add, - Token::Minus => Opcode::Subtract, - // Token::MoreThan => Opcode::Right, - // Token::LessThan => Opcode::Left, - Token::LeftSquareBracket => Opcode::OpenLoop, - Token::RightSquareBracket => Opcode::CloseLoop, - Token::Dot => Opcode::Output, - Token::Comma => Opcode::Input, - _ => r_panic!("Invalid token in inline Brainfuck: {token:?}"), - }) + fn try_from_char(c: char) -> Option { + match c { + '+' => Some(Opcode::Add), + '-' => Some(Opcode::Subtract), + '>' => Some(Opcode::Right), + '<' => Some(Opcode::Left), + '[' => Some(Opcode::OpenLoop), + ']' => Some(Opcode::CloseLoop), + '.' => Some(Opcode::Output), + ',' => Some(Opcode::Input), + _ => None, + } } } diff --git a/compiler/src/backend/bf2d.rs b/compiler/src/backend/bf2d.rs index b85147f..6d6c8a6 100644 --- a/compiler/src/backend/bf2d.rs +++ b/compiler/src/backend/bf2d.rs @@ -37,21 +37,20 @@ pub enum Opcode2D { } impl OpcodeVariant for Opcode2D { - fn from_token(token: &Token) -> Result { - Ok(match token { - Token::Plus => Opcode2D::Add, - Token::Minus => Opcode2D::Subtract, - // Token::MoreThan => Opcode2D::Right, - // Token::LessThan => Opcode2D::Left, - Token::LeftSquareBracket => Opcode2D::OpenLoop, - Token::RightSquareBracket => Opcode2D::CloseLoop, - Token::Dot => Opcode2D::Output, - Token::Comma => Opcode2D::Input, - // Token::Caret => Opcode2D::Up, - // TODO: implement this: - // Token::Down => Opcode2D::Down, - _ => r_panic!("Invalid token in inline Brainfuck: {token:?}"), - }) + fn try_from_char(c: char) -> Option { + match c { + '+' => Some(Opcode2D::Add), + '-' => Some(Opcode2D::Subtract), + '>' => Some(Opcode2D::Right), + '<' => Some(Opcode2D::Left), + '^' => Some(Opcode2D::Up), + 'v' => Some(Opcode2D::Down), + '[' => Some(Opcode2D::OpenLoop), + ']' => Some(Opcode2D::CloseLoop), + '.' => Some(Opcode2D::Output), + ',' => Some(Opcode2D::Input), + _ => None, + } } } diff --git a/compiler/src/backend/common.rs b/compiler/src/backend/common.rs index cb62b18..b45ffce 100644 --- a/compiler/src/backend/common.rs +++ b/compiler/src/backend/common.rs @@ -414,7 +414,7 @@ pub trait OpcodeVariant where Self: Sized + Clone + Copy, { - fn from_token(token: &Token) -> Result; + fn try_from_char(c: char) -> Option; } pub struct CellAllocatorData { diff --git a/compiler/src/frontend.rs b/compiler/src/frontend.rs index 27fe4f9..374b441 100644 --- a/compiler/src/frontend.rs +++ b/compiler/src/frontend.rs @@ -532,7 +532,7 @@ impl MastermindContext { let new_scope = self.create_ir_scope(&clauses, Some(&scope))?; scope.instructions.extend(new_scope.build_ir(true)); } - Clause::InlineBrainfuck { + Clause::Brainfuck { location_specifier, clobbered_variables, operations, diff --git a/compiler/src/parser/parser.rs b/compiler/src/parser/parser.rs index b67187b..993a3f9 100644 --- a/compiler/src/parser/parser.rs +++ b/compiler/src/parser/parser.rs @@ -2,7 +2,7 @@ use super::{ expressions::Expression, tokeniser::{next_token, Token}, types::{ - Clause, LocationSpecifier, Reference, TapeCellLocation, VariableTarget, + Clause, ExtendedOpcode, LocationSpecifier, Reference, TapeCellLocation, VariableTarget, VariableTargetReferenceChain, VariableTypeReference, }, }; @@ -75,6 +75,7 @@ fn parse_clause( self_referencing: false, }) } + Token::Bf => Some(parse_brainfuck_clause(chars)?), token => r_panic!("Invalid starting token `{token}`."), }) } @@ -699,3 +700,72 @@ fn parse_assert_clause(chars: &mut &[char]) -> Result, String Ok(Clause::AssertVariableValue { var, value }) } + +fn parse_brainfuck_clause( + chars: &mut &[char], +) -> Result, String> { + let Token::Bf = next_token(chars)? else { + r_panic!("Expected `bf` in in-line Brainfuck clause."); + }; + + let location_specifier = TC::parse_location_specifier(chars)?; + let mut clobbered_variables = vec![]; + { + let mut s = *chars; + // parse the rare `clobbers` keyword, borrowed from GCC I think? // TODO: look this up + if let Token::Clobbers = next_token(&mut s)? { + *chars = s; + loop { + clobbered_variables.push(parse_var_target(chars)?); + { + let mut s = *chars; + if let Token::LeftBrace = next_token(&mut s)? { + break; + } + } + } + } + } + + let Token::LeftBrace = next_token(chars)? else { + r_panic!("Expected `{{` in in-line Brainfuck clause."); + }; + + // tokenise and parse in-line brainfuck: + // totally different tokenisation to mastermind + let mut operations = vec![]; + loop { + match chars.get(0) { + Some(c) => match OC::try_from_char(*c) { + Some(opcode) => { + *chars = &chars[1..]; + operations.push(ExtendedOpcode::Opcode(opcode)); + } + None => match c { + '{' => { + // recursively parse inner mastermind block + operations.push(ExtendedOpcode::Block(parse_block_clauses(chars)?)); + } + '}' => { + *chars = &chars[1..]; + break; + } + c if c.is_whitespace() => { + *chars = &chars[1..]; + } + c => r_panic!("Unexpected character `{c}` in Brainfuck clause."), + }, + }, + None => { + // TODO: add source snippet + r_panic!("Unexpected end of file in Brainfuck clause."); + } + } + } + + Ok(Clause::Brainfuck { + location_specifier, + clobbered_variables, + operations, + }) +} diff --git a/compiler/src/parser/tests.rs b/compiler/src/parser/tests.rs index e01ceb2..f3d4074 100644 --- a/compiler/src/parser/tests.rs +++ b/compiler/src/parser/tests.rs @@ -139,7 +139,7 @@ mod parser_tests { location_specifier: LocationSpecifier::None, }, }, - Clause::InlineBrainfuck { + Clause::Brainfuck { location_specifier: LocationSpecifier::None, clobbered_variables: vec![], operations: vec![ @@ -170,7 +170,7 @@ mod parser_tests { location_specifier: LocationSpecifier::None, }, }, - Clause::InlineBrainfuck { + Clause::Brainfuck { location_specifier: LocationSpecifier::None, clobbered_variables: vec![], operations: vec![ @@ -193,7 +193,7 @@ mod parser_tests { fn inline_bf_3() { _parser_test_2d( "bf {vvvv>}", - &[Clause::InlineBrainfuck { + &[Clause::Brainfuck { location_specifier: LocationSpecifier::None, clobbered_variables: vec![], operations: vec![ @@ -211,7 +211,7 @@ mod parser_tests { fn inline_bf_4() { assert_eq!( parse_program::("bf {vvvv>}").unwrap_err(), - "" + "Unexpected character `v` in Brainfuck clause." ); } diff --git a/compiler/src/parser/tokeniser.rs b/compiler/src/parser/tokeniser.rs index 305d53e..52bbba9 100644 --- a/compiler/src/parser/tokeniser.rs +++ b/compiler/src/parser/tokeniser.rs @@ -217,7 +217,7 @@ fn parse_character_literal(chars: &mut &[char]) -> Result { } Some('\'') => r_panic!("Unexpected `'` in character literal, must be length 1."), Some(c) => *c, - None => r_panic!("Unexpected end of input found while parsing character literal."), + None => r_panic!("Unexpected end of file while parsing character literal."), }; i += 1; let Some('\'') = chars.get(i) else { diff --git a/compiler/src/parser/types.rs b/compiler/src/parser/types.rs index 2fd299e..ceba317 100644 --- a/compiler/src/parser/types.rs +++ b/compiler/src/parser/types.rs @@ -83,7 +83,7 @@ pub enum Clause { else_block: Vec>, }, Block(Vec>), - InlineBrainfuck { + Brainfuck { location_specifier: LocationSpecifier, clobbered_variables: Vec, operations: Vec>, diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index 1e54057..23a99bf 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -2539,8 +2539,7 @@ bf {,.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvstvv.+++.------.vv-.^^^ "#; assert_eq!( compile_program::(program, None).unwrap_err(), - // TODO: make sure this works correctly after refactoring tokeniser - "" + "Unexpected character `s` in Brainfuck clause." ); } @@ -2553,8 +2552,8 @@ bf {,.[-]+[--^-[^^+^-----vv]v--v---]^-.^^^+.^^..+++[.^]vvstvv.+++.------.vv-.^^^ "~", None, ) - .unwrap(), - "~Hello, World!" + .unwrap_err(), + "2D Brainfuck currently disabled" ); } From a2f76a7745274943137d55f6c46a05e24ca624a1 Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Wed, 12 Nov 2025 00:16:25 +1100 Subject: [PATCH 47/56] Update variables, conditionals, and rewrite loops docos --- docs/conditionals.md | 4 +- docs/loops.md | 119 ++++++++++++++++++++++++------------------- docs/variables.md | 11 ++-- 3 files changed, 74 insertions(+), 60 deletions(-) diff --git a/docs/conditionals.md b/docs/conditionals.md index 664d54b..5a93544 100644 --- a/docs/conditionals.md +++ b/docs/conditionals.md @@ -1,6 +1,4 @@ -### Conditionals - -Mastermind supports basic `if`/`else` statements. An `if` statement takes in a single cell expression, if the expression is evaluated to be truthy, then the `if` block is executed, otherwise the optional `else` block is executed. This behaviour can be inverted using the `not` keyword. +Mastermind supports basic `if`/`else` statements. An `if` statement accepts an expression that evaluates to a `cell` type, if the expression is evaluated to be truthy (i.e. not equal to `0`), then the `if` block is executed, otherwise the optional `else` block is executed. This behaviour can be inverted using the `not` keyword. ``` if 13 { diff --git a/docs/loops.md b/docs/loops.md index adbf9c9..60f2496 100644 --- a/docs/loops.md +++ b/docs/loops.md @@ -1,58 +1,87 @@ -Looping in Mastermind has 3 main forms. These are the: +Mastermind currently supports three forms of loops: `while`, `drain` and `copy`. -- While Loop -- Drain Loop -- Copy Loop +It should be noted that there is no early breaking in any of these forms, so all clauses in a loop body are always executed in each iteration. -all 3 looping styles are essentially variations of a while loop +## While -## While Loop +The `while` loop operates similarly to other languages, accepting a condition expression, and a loop body. -The simplest is the `while` loop, which only supports cell references, currently not expressions: +The clauses inside the loop body are executed until the condition is falsy (i.e. equal to `0`). The condition is checked before each iteration. + +Note: currently `while` conditions must be direct variable references, this is subject to future compiler updates. ``` -while var { - //do stuff - var -= 1; - //etc +cell n = 5; +while n { + // do stuff + n -= 1; } +// n is now equal to 0 ``` -## Drain Loop +## Drain -The `drain` loop is a form of syntax sugar for a self decrementing while loop. This form of loop is extremely common in Brainfuck -so it has been shortened with this syntax +The `drain` loop mirrors a very common pattern found in Brainfuck programs: decrementing a cell. `drain` accepts an expression, a list of variables to 'drain into', and/or a loop body. + +If the expression is a direct variable reference, then the variable is decremented after each iteration. If not, it is evaluated in a temporary cell, then decremented after each iteration. ``` drain var { // do stuff } -``` - -shorthand for the following: -``` +// equivalent to: while var { // do stuff var -= 1; } ``` -This destructively loops as many times as the value in the cell being referenced, this can be used with expressions: +With expressions: -drain 10 {} +``` +drain 6 { + output 'a'; +} +// aaaaaa +``` -drain var - 6 {} +``` +cell x = 7; +drain x - 2 { + output 'b'; +} +// bbbbb +``` -Drain additionally supports the ability to add a variable `into` multiple other variables +In the above example, `x` is left unchanged. + +### Into + +If the `into` keyword is used, followed by a whitespace-separated list of target variables, the targets will be incremented after each iteration. ``` -drain var into other_var other_var_2 *spread_array etc; -``` +cell i; +drain 10 into i { + output '0' + i; +} +// 0123456789 + +// equivalent to: +cell i; +cell ten = 10; +while ten { + output '0' + i; -Equivalent to: + i += 1; + ten -= 1; +} +``` ``` +drain var into other_var other_var_2 *spread_array; + +// equivalent to: drain var { other_var += 1; other_var_2 += 1; @@ -61,46 +90,32 @@ drain var { spread_array[2] += 1; // ... } - -// example of typical "for loop": -cell i; -drain 10 into i { - output '0' + i; // inefficient for the example -} -// "0123456789" -// equivalent to the following: -cell i = 0; -cell N = 10; -while N { - output '0' + i; - i += 1; - N -= 1; -} ``` -## Copy Loop +## Copy -The `copy` loop is similar to a `drain` loop however it is designed to preserve the initial state of the loop variable. -A copy loop is shorthand designed to replace the usage of a temporary variable in a drain loop. +The `copy` loop acts similarly to the `drain` loop, however the expression must be a direct variable reference, and it is left unchanged afterwards, and its original value is accessible within the loop body. ``` +cell var = 5; copy var { - // do stuff + output '0' + var; } -``` - -Equivalent to: +// 55555 -``` +// equivalent to: +cell var = 5; cell temp = var; while temp { - // do stuff + output '0' + var; + temp -= 1; } ``` -You can also `copy into` multiple other variables, similar to the `drain` loop: - ``` -copy var into other_var other_var_2 *spread_array etc; +cell y; +copy x into y { + +}; ``` diff --git a/docs/variables.md b/docs/variables.md index 5ca056d..6b7fcd9 100644 --- a/docs/variables.md +++ b/docs/variables.md @@ -1,5 +1,3 @@ -### Variables - #### Cells The base data type in Mastermind is the `cell`, this corresponds to a a single 8-bit cell on the Brainfuck tape. @@ -26,7 +24,7 @@ drain 5 { // stdout: 11bcd ``` -The simplest way to display text is to output valid ASCII characters, however if your Brainfuck implementation supports unicode, that is also possible by outputting multiple bytes. +The simplest way to display text is to output valid ASCII characters. If your Brainfuck implementation supports unicode, that is also possible by outputting multiple bytes. ``` output 240; @@ -99,14 +97,17 @@ three_S_structs[1].matrix_of_T_structs[3][0] = '5'; #### Note: Array indices must be compile-time constant integers -This is a limitation of Brainfuck, getting around this problem requires more runtime code is worth including for the sake of optimisations. You can implement equivalent behaviour using in-line Brainfuck, structs, and functions. +This is a limitation of Brainfuck, getting around this problem requires more runtime code than is reasonable to include by default, due to the goals of Mastermind. You can implement equivalent behaviour using in-line Brainfuck, structs, and functions. ### Location specifiers The exact memory cells occupied by a variable can be specified: ``` -cell a @4 = 1; // value 1 at tape position 4 +// value 1 at tape position 4 +cell a @4 = 1; +// contiguous array of 1s, starting at cell -1 +cell[3] a @-1 = [1, 1, 1]; ``` #### Struct subfields From 5c9a00b55e4def5d0e87ca87bf4ab579dca2a3ec Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Wed, 12 Nov 2025 01:23:23 +1100 Subject: [PATCH 48/56] Update functions and inline brainfuck docos --- docs/functions.md | 25 +++++---- docs/inlinebrainfuck.md | 115 +++++++++++++++++++--------------------- docs/loops.md | 22 ++++---- docs/variables.md | 2 + 4 files changed, 81 insertions(+), 83 deletions(-) diff --git a/docs/functions.md b/docs/functions.md index a1378e6..24cf791 100644 --- a/docs/functions.md +++ b/docs/functions.md @@ -1,6 +1,6 @@ ### Functions -Mastermind supports a minimal functions system: Functions can be defined with a name and a fixed number of typed arguments. +Mastermind supports a minimal function system: functions can be defined with a name and a fixed number of typed arguments. ``` fn newline() { output '\n'; } @@ -18,7 +18,7 @@ cell g = 9; print_zeros(g); ``` -Functions are in-lined at compile-time, and all arguments are passed by reference. Values can be returned by editing the arguments, or editing variables in an outer scope, although the latter makes a function less portable. +Unlike most modern programming languages, functions are not considered first-class values. Functions in Mastermind are in-lined at compile-time, and all arguments are passed by reference. Values can be returned by editing passed in arguments, or editing variables in an outer scope, although the latter makes a function less portable. ``` fn is_zero(cell in, cell out) { @@ -38,32 +38,31 @@ Example showing a function reading a variable from an outer scope: ``` fn print_global_g(cell count) { copy count { - output g; - output ' '; + output chr; } } -cell g = 'g'; -cell count = 11; +cell chr = 'g'; +cell count = 3; print_global_g(count); -// g g g g g g g g g g g +// ggg { // inner scope with a new 'g' allocation - cell g = 'G'; - count = 4; + cell chr = 'G'; + count = 5; print_global_g(count); - // G G G G + // GGGGG } -// same call again, now the inner 'G' has been freed +// same call again, now the inner chr has been freed print_global_g(count); -// g g g g +// ggg ``` #### Structs and Overloads -Example of supported behaviour: +Functions support overloads with different types or number of arguments. Examples of supported behaviour: ``` fn func1() { diff --git a/docs/inlinebrainfuck.md b/docs/inlinebrainfuck.md index b2cea17..f693e4e 100644 --- a/docs/inlinebrainfuck.md +++ b/docs/inlinebrainfuck.md @@ -2,14 +2,18 @@ In-line Brainfuck allows the programmer to define custom behaviour as if writing raw Brainfuck, much in the same way as C has in-line assembly syntax. +Basic example: + ``` -// This is its most basic form: // find the next cell that equals -1 bf { +[->+]- } +``` -// This is its more advanced form: +More advanced example: + +``` // input a line of lowercase letters and output the uppercase version // this is an intentionally inefficient example bf @3 clobbers var *spread_var etc { @@ -29,11 +33,11 @@ bf @3 clobbers var *spread_var etc { } ``` -It is the programmer's responsibility to clear used cells and return back to the cell in which they started the in-line Brainfuck context. If the programmer does not do this, any mastermind code after the in-line Brainfuck command will likely break. +It is the programmer's responsibility to clear used cells and return back to the cell in which they started the in-line Brainfuck context. If the programmer does not do this, any following Mastermind code may break. #### Memory location specifiers -For hand-tuning optimisations and in-line Brainfuck that reads from Mastermind variables, you can specify the location on the Brainfuck tape: +The exact location to start an in-line Brainfuck context can be specified: ``` cell var @3 = 4; @@ -45,19 +49,9 @@ bf @4 { // compiled: >>>><><><> ``` -Alternatively if using the 2D grid you can use a comma seperated list with a second value: - -``` - -bf @4,3 { - <><><> -} -// compiled: >>>>^^^<><><> -``` - #### Clobbering and Assertions -Mastermind will try to predict the value of cells at compile-time, so it can prevent unnecessary cell clean-ups and unreachable code (with optimisations turned on). If your in-line Brainfuck affects existing Mastermind variables, you should tell the compiler using the `clobbers` keyword, the syntax is similar to the `drain into` list: +With optimisations enabled, Mastermind will try to predict the value of cells at compile-time, so it can prevent unnecessary cell clean-ups and unreachable code. If your in-line Brainfuck affects existing Mastermind variables, you should tell the compiler using the `clobbers` keyword, the syntax is similar to the `drain into` target list: ``` bf clobbers var *spread_var other_var etc {} @@ -78,29 +72,56 @@ Asserting a variable as `unknown` is equivalent to clobbering. #### Embedded Mastermind -You can embed high-level Mastermind code within a Brainfuck context, this allows you to control precisely what the generated Brainfuck code is doing, whilst also taking advantage of the syntax features of Mastermind. +You can embed high-level Mastermind code within a Brainfuck context. During compilation the embedded Mastermind is compiled and the generated Brainfuck is inserted in place. ``` -cell sum @0; +// input 3 n-length lines of input +bf { + >+++<,[ + { + cell input_char @0; + assert input_char unknown; + cell length_remaining @1; + assert length_remaining unknown; + + cell next_char @2; + cell next_length_remaining @3; + if not input_char - '\n' { + length_remaining -= 1; + } + if length_remaining { + drain length_remaining into next_length_remaining; + input next_char; + } + } + >>] +} +``` + +This can be done recursively, for example: -bf @0 { - >> - // read input (until eof) to the tape, nullifying any spaces or newlines - // (this is probably not a good practical example, ideas are appreciated) - ,[ +``` +// top-level Mastermind context +bf { + ++>> + { + // inner Mastermind context + bf { + ++>> { - cell c @0; - assert c unknown; // needed otherwise the compiler assumes c = 0 - - if not (c - '\n') { - c = 0; - } - if not (c - ' ') { - c = 0; - } + // inner inner Mastermind context + bf { + ++>> + { + //... + } + <<-- + } } - >, - ] + <<-- + } + } + <<-- } ``` @@ -125,31 +146,3 @@ bf { }} } ``` - -#### Craziness - -You can put in-line Brainfuck inside your embedded Mastermind. - -``` -bf { - ++++[ - { - cell i @0; - assert i unknown; - cell j @1 = i + 1; - - bf @1 { - [.+] - { - // even more layers are possible - bf { - { - output "h" - } - } - } - } - } - -] -} -``` diff --git a/docs/loops.md b/docs/loops.md index 60f2496..99b5c85 100644 --- a/docs/loops.md +++ b/docs/loops.md @@ -46,6 +46,8 @@ drain 6 { // aaaaaa ``` +The following example leaves `x` unchanged: + ``` cell x = 7; drain x - 2 { @@ -54,8 +56,6 @@ drain x - 2 { // bbbbb ``` -In the above example, `x` is left unchanged. - ### Into If the `into` keyword is used, followed by a whitespace-separated list of target variables, the targets will be incremented after each iteration. @@ -78,6 +78,8 @@ while ten { } ``` +Another example: + ``` drain var into other_var other_var_2 *spread_array; @@ -96,6 +98,15 @@ drain var { The `copy` loop acts similarly to the `drain` loop, however the expression must be a direct variable reference, and it is left unchanged afterwards, and its original value is accessible within the loop body. +``` +cell y; +copy x into y { + // loop body +}; +``` + +An equivalence example: + ``` cell var = 5; copy var { @@ -112,10 +123,3 @@ while temp { temp -= 1; } ``` - -``` -cell y; -copy x into y { - -}; -``` diff --git a/docs/variables.md b/docs/variables.md index 6b7fcd9..c070d79 100644 --- a/docs/variables.md +++ b/docs/variables.md @@ -8,6 +8,8 @@ cell c = 'g'; cell bool = true; // true/false equivalent to 1/0 ``` +Cells default to `0`. + #### Input/Output The `input` and `output` keywords in Mastermind correspond to the `,` and `.` operators in Brainfuck. `input` simply inputs the next byte from stdin, and `output` outputs a byte to stdout. From 49498b51190d4929f74593b5ad89fc75b158b3cf Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Wed, 12 Nov 2025 11:08:00 +1100 Subject: [PATCH 49/56] Update optimisation and variants docos --- docs/optimisations.md | 59 ++++++++++++++++++++++++----------------- docs/twodimensional.md | 57 --------------------------------------- docs/variants.md | 56 ++++++++++++++++++++++++++++++++++++++ src/components/Docs.tsx | 22 +++++++-------- 4 files changed, 101 insertions(+), 93 deletions(-) delete mode 100644 docs/twodimensional.md create mode 100644 docs/variants.md diff --git a/docs/optimisations.md b/docs/optimisations.md index 716060c..e5ca212 100644 --- a/docs/optimisations.md +++ b/docs/optimisations.md @@ -1,34 +1,34 @@ -### Optimisations +The Mastermind compiler includes optional optimisations for generated code. The original goal of Mastermind was to generate very minimal Brainfuck for use in Code Golf competitions, so most of these are aimed at reducing generated code length. -The optimisations in the Mastermind compiler are aimed at reducing the compiled Brainfuck code length, not necessarily execution speed. This is due to the original goal of the project: Code Golf in Brainfuck. + #### Cell Clearing -This optimises the clearing of cells by tracking their values at compile-time. For instance, if a cell can be proven at compile-time to have the value `2`, it is more efficient to clear with `--`, than the typical Brainfuck clear: `[-]`. + -#### Constants +Optimises clearing cells after they are de-allocated, it does this by tracking their values at compile-time and acting based on a cell's known value. For instance, if a cell can be proven at compile-time to have the value `2`, it is more efficient to clear with `--`, than the typical Brainfuck clear: `[-]`. -When large values are added in Brainfuck, the naive approach is to use the increment `-` operator for as many times as needed. The constants optimiser will use multiplication to shorten the code needed to add/subtract large values. Example: the value `45` can be achieved by either `+++++++++++++++++++++++++++++++++++++++++++++` or the shorter: `+++++[<+++++++++>-]>`. +#### Constants -#### Empty Blocks + -This detects if a code block is empty, and does not compile the clause associated. This is helpful for `if` statements and `copy` loops especially, as those can imply extra overhead for copying cells. +When large values are added in Brainfuck, the naive approach is to use the increment `-` operator for as many times as needed. The constants optimiser will use multiplication to shorten the code needed to add/subtract large values. Example: the value `46` can be achieved by either `++++++++++++++++++++++++++++++++++++++++++++++` or the shorter: `+++++[>+++++++++<-]>+` (5 \* 9 + 1). #### Generated Code -This is a final pass optimisation that operates directly on Brainfuck code, optimising subsets of programs which can be shortened while still guaranteeing equivalent behaviour. Example: + -``` ---->>><<<++ -``` +Optimises generated Brainfuck code by shortening trivial program segments. -Is equivalent to: +Currently this is limited to optimising segments of Brainfuck programs with the following operations: `+`, `-`, `>`, `<`, `[-]`. ``` +--->>><<<++ +// becomes: - ``` -It is difficult to analyse the behaviour of a Brainfuck program at compile time, so this optimiser is limited to subsets of a program's operations between I/O operations and loops (with exception). Example: +An end-to-end example: ``` cell h = 4; @@ -41,32 +41,41 @@ drain 10 { h += 4; j += 1; } -``` - -Compiles to: -``` +// compiles to: ++++>+++<++++++++++>>++++++++++[<+<++++>[-]+++++>-] +// after optimisation: +++++++++++++++>+++>++++++++++[-<[-]+++++<++++>>] ``` -After optimisation: +This system finds optimal equivalent segments for classic Brainfuck programs, however for the 2D Brainfuck variant it is not guaranteed, as finding the optimal path between memory cells in a 2D grid is more difficult. The _Generated Code Permutations_ setting enables an exhaustive search for the optimal path when using the 2D Brainfuck variant, otherwise a greedy approach is used. -``` -++++++++++++++>+++>++++++++++[-<[-]+++++<++++>>] -``` +#### Empty Blocks + + -For the 2D compiler extensions, this system can use an exhaustive search to determine the least movement between cells. This could become slow depending on the project, so it can be configured to use a greedy approach. This is done via the _Generated Code Permutations_ setting in the web IDE. +Detects if a code block is empty or has no effect on the program, and prunes the associated clause. #### Unreachable Loops -If a cell is known to have a value of `0` at compile time, and that cell is used to open a Brainfuck loop, then that entire loop is omitted. This is implemented at a low level, so it is agnostic of the syntactic structure that it is optimising, i.e `if`, `while`, `drain`. + + +Brainfuck loops will be omitted if the cell they start on can be proven to be `0` at compile-time. ### Unimplemented Optimisations #### Memory Allocations -The goal of this is to optimise placing variables in tape memory to minimise movement between them. + + +// TODO + + #### Variable Usage -The goal of this is to automatically change the order of variable allocations/frees to ensure tape memory is allocated for the smallest amount of execution steps possible. This would allow allocation to be more efficient, as cells can be allocated which would otherwise be taken by variables that are not in use. + + +// TODO + + diff --git a/docs/twodimensional.md b/docs/twodimensional.md deleted file mode 100644 index bf75f72..0000000 --- a/docs/twodimensional.md +++ /dev/null @@ -1,57 +0,0 @@ -### Two-Dimensional Brainfuck - -Two-dimensional Brainfuck is an extension which provides an additional dimension to the memory tape. - -To support this, two new operations have been added to this extended version of the language: - -- `^`: move up one cell on the grid -- `v`: move down one cell on the grid - -#### Using 2D Brainfuck in Mastermind - -This behaviour must be enabled in the included Brainfuck interpreter. In the web IDE this is done via the settings modal. - -When this setting is enabled in isolation, the compiler will still generate typical 1D Brainfuck code. To make the compiler use multiple dimensions you must either: - -- Use a 2D-specific memory allocation algorithm -- Use a 2D location specifier on a variable -- Use in-line Brainfuck with 2D instructions - -### Memory Allocation Algorithms - -There are currently four allocation strategies implemented (including the original 1D). - -#### 1D Mastermind - -_1D Mastermind_ allocates the closest free cells to the right of the origin. - -#### 2D Mastermind - Zig Zag - -_2D Mastermind - Zig Zag_ treats the memory as a grid and fills in values from x 0 and some y value diagonally until it reaches y 0 and the same x value as the starting y. The table below shows the order that this is populated - -| 7 | | | | -| --- | --- | --- | --- | -| 4 | 8 | | | -| 2 | 5 | 9 | | -| 1 | 3 | 6 | 10 | - -#### 2D Mastermind - Spiral - -_2D Mastermind - Spiral_ starts from 0,0 and move in a Spiral such that each subsequent memory -value is only 1 step away from the last. This means that it will start by filling a 2x2 grid then from the bottom corner of -that grid it will iterate around that 2x2 filling a 4x4 area - -| 10 | 11 | 12 | -| --- | --- | --- | -| 9 | 2 | 3 | -| 8 | 1 | 4 | -| 7 | 6 | 5 | - -#### 2D Mastermind - Tiles - -_2D Mastermind - Tiles_ allocates a tile of memory and check all cells in that area before expanding to check new cells. This algorithm starts at 0,0 with a 1x1 area then will move down to -1, -1 and check a new 3x3 area it will check each area column by column from the bottom row up so (-1, -1), (0, -1), (1, -1), (-1, 0)... - -| 4 | 6 | 9 | -| --- | --- | --- | -| 3 | 1 | 8 | -| 2 | 5 | 7 | diff --git a/docs/variants.md b/docs/variants.md new file mode 100644 index 0000000..0daf3e7 --- /dev/null +++ b/docs/variants.md @@ -0,0 +1,56 @@ +The Mastermind compiler can be extended to support Brainfuck variants. + +### 2D Brainfuck + +Mastermind currently supports two-dimensional Brainfuck, this is a Brainfuck variant with an additional dimension in the memory array. + +2D Brainfuck support can be enabled in the compiler settings in the web IDE, adding the following: + +New opcodes for in-line Brainfuck contexts and in generated Brainfuck code: + +- `^`: move up one cell in the grid +- `v`: move down one cell in the grid +- + +#### Memory Allocation Algorithms + +##### Default + +Allocates the closest free cells to the right of the origin. + +##### Zig Zag + +// TODO + + + +##### Spiral + +// TODO + + + +##### Tiles + +// TODO + + diff --git a/src/components/Docs.tsx b/src/components/Docs.tsx index e4f2f52..08961ef 100644 --- a/src/components/Docs.tsx +++ b/src/components/Docs.tsx @@ -13,23 +13,23 @@ import loops from "../../docs/loops.md?raw"; import functions from "../../docs/functions.md?raw"; import inlinebrainfuck from "../../docs/inlinebrainfuck.md?raw"; import standardlib from "../../docs/standardlib.md?raw"; -import twodimensional from "../../docs/twodimensional.md?raw"; +import variants from "../../docs/variants.md?raw"; import optimisations from "../../docs/optimisations.md?raw"; import { FaSolidArrowLeftLong, FaSolidArrowRightLong } from "solid-icons/fa"; const DocsModal: Component<{ style?: JSX.CSSProperties }> = () => { const app = useAppContext()!; const docs = { - Introduction: intro, - Brainfuck: brainfuck, - Variables: variables, - Conditionals: conditionals, - Loops: loops, - Functions: functions, - "Inline Brainfuck": inlinebrainfuck, - "Standard Library": standardlib, - "2D Mastermind": twodimensional, - Optimisations: optimisations, + ["Introduction"]: intro, + ["Brainfuck"]: brainfuck, + ["Variables"]: variables, + ["Conditionals"]: conditionals, + ["Loops"]: loops, + ["Functions"]: functions, + ["Inline Brainfuck"]: inlinebrainfuck, + ["Standard Library"]: standardlib, + ["Brainfuck Variants"]: variants, + ["Optimisations"]: optimisations, }; const titles = Object.keys(docs); const [selected, setSelected] = createSignal(titles[0]); From df1ce84fe68b19847a44a7a41e585bc6f32e882e Mon Sep 17 00:00:00 2001 From: Heath Manning Date: Wed, 12 Nov 2025 13:13:26 +1100 Subject: [PATCH 50/56] Remove unimplemented optimisation configs, collate and move documentation, add git commit hash to web IDE --- .gitignore | 2 + README.md | 8 +- compiler/src/backend/bf.rs | 5 +- compiler/src/backend/bf2d.rs | 6 +- compiler/src/backend/common.rs | 2 +- compiler/src/brainfuck_optimiser.rs | 8 +- compiler/src/misc.rs | 19 +- compiler/src/tests.rs | 24 +- docs/brainfuck.md | 34 -- docs/conditionals.md | 25 - docs/functions.md | 98 ---- docs/inlinebrainfuck.md | 148 ------ docs/intro.md | 19 - docs/loops.md | 125 ----- docs/optimisations.md | 81 --- docs/standardlib.md | 77 --- docs/variables.md | 151 ------ docs/variants.md | 56 -- load_env.sh | 2 + package.json | 5 +- reference.md | 788 ++++++++++++++++++++++++++++ runningMastermind.md | 36 -- src/App.css | 8 +- src/App.tsx | 4 +- src/components/Docs.tsx | 104 ---- src/components/Settings.tsx | 264 +++++----- src/panels/CompilerPanel.tsx | 6 +- src/panels/SideBar.tsx | 115 ++-- src/panels/settings.css | 32 +- src/vite-env.d.ts | 10 + yarn.lock | 12 + 31 files changed, 1076 insertions(+), 1198 deletions(-) delete mode 100644 docs/brainfuck.md delete mode 100644 docs/conditionals.md delete mode 100644 docs/functions.md delete mode 100644 docs/inlinebrainfuck.md delete mode 100644 docs/intro.md delete mode 100644 docs/loops.md delete mode 100644 docs/optimisations.md delete mode 100644 docs/standardlib.md delete mode 100644 docs/variables.md delete mode 100644 docs/variants.md create mode 100755 load_env.sh create mode 100644 reference.md delete mode 100644 runningMastermind.md delete mode 100644 src/components/Docs.tsx diff --git a/.gitignore b/.gitignore index 8b7e502..7337916 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,5 @@ dist-ssr *.njsproj *.sln *.sw? + +.env diff --git a/README.md b/README.md index c082f61..c63cb5e 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,11 @@ Mastermind is a programming language designed to compile to the esoteric language _Brainfuck_. -Brainfuck is essentially a modern interpretation of the classical Turing machine. It consists of a tape of 8-bit values, with simple increment/decrement, move left/right, and control flow operations. The full language only uses 8 control characters: `+-><.,[]`. +Brainfuck is essentially a modern interpretation of the classical Turing machine. It consists of an array (or _tape_) of 8-bit values, with simple increment/decrement, move left/right, and control flow operations. The full language only uses 8 control characters: `+-><.,[]`. -Imagine if C was designed for computer architectures that run Brainfuck directly, that is what Mastermind is intended to be. +Imagine an alternate reality where C was designed for computer architectures that run Brainfuck directly, that is what Mastermind is intended to be. + +Mastermind language/compiler reference can be found here: ## Development and Setup @@ -17,7 +19,7 @@ Imagine if C was designed for computer architectures that run Brainfuck directly - Run `yarn build:grammar`. - Run `yarn dev`, then follow the link to http://localhost:5173. -Commits to _dev_ and _main_ are published to https://staging.mastermind.lostpixels.org and https://mastermind.lostpixels.org respectively. +Pushes to _dev_ and _main_ are published to https://staging.mastermind.lostpixels.org and https://mastermind.lostpixels.org respectively. ### Overview: diff --git a/compiler/src/backend/bf.rs b/compiler/src/backend/bf.rs index 18356a7..3aef49b 100644 --- a/compiler/src/backend/bf.rs +++ b/compiler/src/backend/bf.rs @@ -2,10 +2,7 @@ use super::common::{ BrainfuckBuilder, BrainfuckBuilderData, BrainfuckProgram, CellAllocator, CellAllocatorData, OpcodeVariant, TapeCellVariant, }; -use crate::{ - macros::macros::{r_assert, r_panic}, - parser::tokeniser::Token, -}; +use crate::macros::macros::{r_assert, r_panic}; pub type TapeCell = i32; impl TapeCellVariant for TapeCell { diff --git a/compiler/src/backend/bf2d.rs b/compiler/src/backend/bf2d.rs index 6d6c8a6..aff0545 100644 --- a/compiler/src/backend/bf2d.rs +++ b/compiler/src/backend/bf2d.rs @@ -2,10 +2,7 @@ use super::common::{ BrainfuckBuilder, BrainfuckBuilderData, BrainfuckProgram, CellAllocator, CellAllocatorData, OpcodeVariant, TapeCellVariant, }; -use crate::{ - macros::macros::{r_assert, r_panic}, - parser::tokeniser::Token, -}; +use crate::macros::macros::{r_assert, r_panic}; use std::{fmt::Display, hash::Hash}; @@ -112,6 +109,7 @@ impl Display for TapeCell2D { } } +// TODO: refactor impl CellAllocator for CellAllocatorData { /// Check if the desired number of cells can be allocated to the right of a given location fn check_allocatable(&mut self, location: &TapeCell2D, size: usize) -> bool { diff --git a/compiler/src/backend/common.rs b/compiler/src/backend/common.rs index b45ffce..816ea5c 100644 --- a/compiler/src/backend/common.rs +++ b/compiler/src/backend/common.rs @@ -3,7 +3,7 @@ use crate::{ frontend::{CellLocation, Instruction, MemoryId}, macros::macros::{r_assert, r_panic}, misc::{MastermindConfig, MastermindContext}, - parser::{tokeniser::Token, types::TapeCellLocation}, + parser::types::TapeCellLocation, }; use std::{ diff --git a/compiler/src/brainfuck_optimiser.rs b/compiler/src/brainfuck_optimiser.rs index a02e62d..9b5d153 100644 --- a/compiler/src/brainfuck_optimiser.rs +++ b/compiler/src/brainfuck_optimiser.rs @@ -222,8 +222,8 @@ mod bf_optimiser_tests { optimise_generated_all_permutations: false, optimise_cell_clearing: false, optimise_unreachable_loops: false, - optimise_variable_usage: false, - optimise_memory_allocation: false, + // optimise_variable_usage: false, + // optimise_memory_allocation: false, optimise_constants: false, optimise_empty_blocks: false, memory_allocation_method: 0, @@ -237,8 +237,8 @@ mod bf_optimiser_tests { optimise_generated_all_permutations: true, optimise_cell_clearing: false, optimise_unreachable_loops: false, - optimise_variable_usage: false, - optimise_memory_allocation: false, + // optimise_variable_usage: false, + // optimise_memory_allocation: false, optimise_constants: false, optimise_empty_blocks: false, memory_allocation_method: 0, diff --git a/compiler/src/misc.rs b/compiler/src/misc.rs index 17c1774..bdbf7c2 100644 --- a/compiler/src/misc.rs +++ b/compiler/src/misc.rs @@ -4,15 +4,11 @@ pub struct MastermindConfig { pub optimise_generated_code: bool, // TODO: rename this: (turn on exhaustive search for solving 2D brainfuck optimisation) pub optimise_generated_all_permutations: bool, + // track cell value and clear with constant addition if possible pub optimise_cell_clearing: bool, // track cell value and skip loops which can never be entered pub optimise_unreachable_loops: bool, - // TODO: prune variables that aren't needed? Maybe combine with empty blocks stuff - pub optimise_variable_usage: bool, - // TODO: optimise memory layout to minimise tape head movement - // recommended to turn on these next two together - pub optimise_memory_allocation: bool, // golf constants, useful for single characters or large numbers // probably not great with strings yet, may need another optimisation for that pub optimise_constants: bool, @@ -26,6 +22,10 @@ pub struct MastermindConfig { // '2D Mastermind - Nearest' 3 pub memory_allocation_method: u8, pub enable_2d_grid: bool, + // TODO: prune variables that aren't needed? Maybe combine with empty blocks stuff + // pub optimise_variable_usage: bool, + // recommended to turn on these next two together + // pub optimise_memory_allocation: bool, } impl Default for MastermindConfig { @@ -35,8 +35,8 @@ impl Default for MastermindConfig { optimise_generated_all_permutations: false, optimise_cell_clearing: false, optimise_unreachable_loops: false, - optimise_variable_usage: false, - optimise_memory_allocation: false, + // optimise_variable_usage: false, + // optimise_memory_allocation: false, optimise_constants: false, optimise_empty_blocks: false, memory_allocation_method: 0, @@ -46,14 +46,15 @@ impl Default for MastermindConfig { } impl MastermindConfig { + // TODO: rethink this bitmask thing pub fn new(optimise_bitmask: usize) -> MastermindConfig { MastermindConfig { optimise_generated_code: (optimise_bitmask & 0b00000001) > 0, optimise_generated_all_permutations: (optimise_bitmask & 0b00001000) > 0, optimise_cell_clearing: (optimise_bitmask & 0b00000010) > 0, optimise_unreachable_loops: (optimise_bitmask & 0b00000100) > 0, - optimise_variable_usage: false, - optimise_memory_allocation: false, + // optimise_variable_usage: false, + // optimise_memory_allocation: false, optimise_constants: false, optimise_empty_blocks: false, memory_allocation_method: 0, diff --git a/compiler/src/tests.rs b/compiler/src/tests.rs index 23a99bf..910cb61 100644 --- a/compiler/src/tests.rs +++ b/compiler/src/tests.rs @@ -22,8 +22,8 @@ pub mod black_box_tests { optimise_generated_code: false, optimise_generated_all_permutations: false, optimise_cell_clearing: false, - optimise_variable_usage: false, - optimise_memory_allocation: false, + // optimise_variable_usage: false, + // optimise_memory_allocation: false, optimise_unreachable_loops: false, optimise_constants: false, optimise_empty_blocks: false, @@ -35,8 +35,8 @@ pub mod black_box_tests { optimise_generated_code: true, optimise_generated_all_permutations: false, optimise_cell_clearing: true, - optimise_variable_usage: true, - optimise_memory_allocation: true, + // optimise_variable_usage: true, + // optimise_memory_allocation: true, optimise_unreachable_loops: true, optimise_constants: true, optimise_empty_blocks: true, @@ -48,8 +48,8 @@ pub mod black_box_tests { optimise_generated_code: false, optimise_generated_all_permutations: false, optimise_cell_clearing: false, - optimise_variable_usage: false, - optimise_memory_allocation: false, + // optimise_variable_usage: false, + // optimise_memory_allocation: false, optimise_unreachable_loops: false, optimise_constants: false, optimise_empty_blocks: false, @@ -61,8 +61,8 @@ pub mod black_box_tests { optimise_generated_code: false, optimise_generated_all_permutations: false, optimise_cell_clearing: false, - optimise_variable_usage: false, - optimise_memory_allocation: false, + // optimise_variable_usage: false, + // optimise_memory_allocation: false, optimise_unreachable_loops: false, optimise_constants: false, optimise_empty_blocks: false, @@ -74,8 +74,8 @@ pub mod black_box_tests { optimise_generated_code: false, optimise_generated_all_permutations: false, optimise_cell_clearing: false, - optimise_variable_usage: false, - optimise_memory_allocation: false, + // optimise_variable_usage: false, + // optimise_memory_allocation: false, optimise_unreachable_loops: false, optimise_constants: false, optimise_empty_blocks: false, @@ -2596,8 +2596,8 @@ cell a = 'G'; optimise_generated_code: false, optimise_generated_all_permutations: false, optimise_cell_clearing: false, - optimise_variable_usage: false, - optimise_memory_allocation: false, + // optimise_variable_usage: false, + // optimise_memory_allocation: false, optimise_unreachable_loops: false, optimise_constants: false, optimise_empty_blocks: false, diff --git a/docs/brainfuck.md b/docs/brainfuck.md deleted file mode 100644 index cb2fbce..0000000 --- a/docs/brainfuck.md +++ /dev/null @@ -1,34 +0,0 @@ -### Brainfuck - -Brainfuck is an esoteric programming language, originally designed as a theoretical example of a Turing complete language with an extremely minimal compiler. The name is due to its difficulty, it is significantly more difficult to create complex programs than in any popular modern language. - -### Specification - -When a Brainfuck program is run, it operates on a array/tape of cells, performing operations on the tape. Each cell contains an integer, initialised to 0 by default. The program operates on one cell at a time based on the position of a "tape head". Brainfuck supports the following operations: - -- `+`: increments the value of the current cell -- `-`: decrement the value of the current cell -- `>`: move the tape head one cell to the right -- `<`: move the tape head one cell to the left -- `.`: output the current cell as a byte to stdout -- `,`: input a byte from stdin, overwriting the current cell -- `[`: jump to the corresponding `]` if the current cell is 0 -- `]`: jump to the corresponding `[` if the current cell is not 0 - -A Brainfuck program consists of a list of these commands, which are executed sequentially. The program terminates if the final operation in the list is executed. - -### Interpreter Implementation Details - -The Mastermind IDE and compiler library contains an implementation of a Brainfuck interpreter. This implementation is intended to match the behaviour of the most popular Brainfuck implementations: - -#### 8-bit Wrapping Cells - -In this implementation, each cell is an 8-bit integer that wraps if an increment or decrement operation overflows or underflows. - -E.g. given the current tape cell value is `255`, after an increment (`+`), the cell value is now `0`. - -Similarly: `0`, after a decrement (`-`) becomes `255` - -#### Infinite Bidirectional Tape - -In this implementation, the tape extends infinitely in both directions. diff --git a/docs/conditionals.md b/docs/conditionals.md deleted file mode 100644 index 5a93544..0000000 --- a/docs/conditionals.md +++ /dev/null @@ -1,25 +0,0 @@ -Mastermind supports basic `if`/`else` statements. An `if` statement accepts an expression that evaluates to a `cell` type, if the expression is evaluated to be truthy (i.e. not equal to `0`), then the `if` block is executed, otherwise the optional `else` block is executed. This behaviour can be inverted using the `not` keyword. - -``` -if 13 { - output "13"; -} - -if not true { - // unreachable -} - -cell var = 4; -if var { - output "true"; -} else { - output "false"; -} - -// typical equivalence use-case: -if not var - 10 { - // == -} else { - // != -} -``` diff --git a/docs/functions.md b/docs/functions.md deleted file mode 100644 index 24cf791..0000000 --- a/docs/functions.md +++ /dev/null @@ -1,98 +0,0 @@ -### Functions - -Mastermind supports a minimal function system: functions can be defined with a name and a fixed number of typed arguments. - -``` -fn newline() { output '\n'; } - -fn print_zeros(cell num) { - copy num { - output '0'; - } - newline(); -} - -// expressions as arguments are currently not supported, -// i.e. print_zeros(9) -cell g = 9; -print_zeros(g); -``` - -Unlike most modern programming languages, functions are not considered first-class values. Functions in Mastermind are in-lined at compile-time, and all arguments are passed by reference. Values can be returned by editing passed in arguments, or editing variables in an outer scope, although the latter makes a function less portable. - -``` -fn is_zero(cell in, cell out) { - out = true; - if in { - out = false; - } -} - -cell value = 'h'; -cell falsy; -is_zero(value, falsy); -``` - -Example showing a function reading a variable from an outer scope: - -``` -fn print_global_g(cell count) { - copy count { - output chr; - } -} - -cell chr = 'g'; -cell count = 3; -print_global_g(count); -// ggg - -{ - // inner scope with a new 'g' allocation - cell chr = 'G'; - count = 5; - print_global_g(count); - // GGGGG -} - -// same call again, now the inner chr has been freed -print_global_g(count); -// ggg -``` - -#### Structs and Overloads - -Functions support overloads with different types or number of arguments. Examples of supported behaviour: - -``` -fn func1() { - output '1'; -} -fn func1(cell a) { - output '2'; -} -fn func1(cell a, cell b) { - output '3'; -} -struct X { cell a; } -fn func1(struct X x) { - output '4'; -} -struct Y { cell a; } -fn func1(struct Y y) { - output '5'; -} -fn func1(cell a, struct X x, struct Y y) { - output '6'; -} -cell n; -struct X x; -struct Y y; -func1(); -func1(n); -func1(n, n); -func1(x); -func1(y); -func1(n, x, y); -// 123456 -``` diff --git a/docs/inlinebrainfuck.md b/docs/inlinebrainfuck.md deleted file mode 100644 index f693e4e..0000000 --- a/docs/inlinebrainfuck.md +++ /dev/null @@ -1,148 +0,0 @@ -### In-line Brainfuck - -In-line Brainfuck allows the programmer to define custom behaviour as if writing raw Brainfuck, much in the same way as C has in-line assembly syntax. - -Basic example: - -``` -// find the next cell that equals -1 -bf { - +[->+]- -} -``` - -More advanced example: - -``` -// input a line of lowercase letters and output the uppercase version -// this is an intentionally inefficient example -bf @3 clobbers var *spread_var etc { - ,----------[++++++++++>,----------] - <[<]> - [ - { - cell g @0; - assert g unknown; - output g + ('A' - 'a'); - // embedded Mastermind! - } - > - ] - // now clear and return - <[[-]<]> -} -``` - -It is the programmer's responsibility to clear used cells and return back to the cell in which they started the in-line Brainfuck context. If the programmer does not do this, any following Mastermind code may break. - -#### Memory location specifiers - -The exact location to start an in-line Brainfuck context can be specified: - -``` -cell var @3 = 4; -// compiled: >>>++++ - -bf @4 { - <><><> -} -// compiled: >>>><><><> -``` - -#### Clobbering and Assertions - -With optimisations enabled, Mastermind will try to predict the value of cells at compile-time, so it can prevent unnecessary cell clean-ups and unreachable code. If your in-line Brainfuck affects existing Mastermind variables, you should tell the compiler using the `clobbers` keyword, the syntax is similar to the `drain into` target list: - -``` -bf clobbers var *spread_var other_var etc {} -``` - -The compiler will now assume nothing about the values of those variables afterwards. - -If instead you want to tell the compiler specifically that a variable has become a certain value, you can use `assert`: - -``` -assert var equals 3; -// most common use cases: -assert var equals 0; -assert var unknown; -``` - -Asserting a variable as `unknown` is equivalent to clobbering. - -#### Embedded Mastermind - -You can embed high-level Mastermind code within a Brainfuck context. During compilation the embedded Mastermind is compiled and the generated Brainfuck is inserted in place. - -``` -// input 3 n-length lines of input -bf { - >+++<,[ - { - cell input_char @0; - assert input_char unknown; - cell length_remaining @1; - assert length_remaining unknown; - - cell next_char @2; - cell next_length_remaining @3; - if not input_char - '\n' { - length_remaining -= 1; - } - if length_remaining { - drain length_remaining into next_length_remaining; - input next_char; - } - } - >>] -} -``` - -This can be done recursively, for example: - -``` -// top-level Mastermind context -bf { - ++>> - { - // inner Mastermind context - bf { - ++>> - { - // inner inner Mastermind context - bf { - ++>> - { - //... - } - <<-- - } - } - <<-- - } - } - <<-- -} -``` - -The compiler cannot guarantee the global head position at compile time within an in-line Brainfuck context. Therefore memory location specifiers are relative to the current embedded Mastermind context, not the entire program. - -Also, top-level variables are not cleared by default in Mastermind contexts, this allows you to "leave" variables in cells for your Brainfuck to use. If you want variables in your embedded Mastermind to be automatically cleared, you can open a scope at the top level: - -``` -bf { - ++----++[][][<><><>] // the program doesn't matter for this example - { - // variables here will not be cleared - cell g @2; - assert g unknown; - { - // variables here will be cleared - let b = 32; - } - } - {{ - // self-cleaning Mastermind code here - }} -} -``` diff --git a/docs/intro.md b/docs/intro.md deleted file mode 100644 index 29aeb35..0000000 --- a/docs/intro.md +++ /dev/null @@ -1,19 +0,0 @@ -Mastermind is a programming language designed to compile to the well-known esoteric language "Brainfuck". - -Brainfuck is essentially a modern interpretation of the classical Turing machine. It consists of a tape of 8-bit values, with simple increment/decrement, move left/right, input/output, and looping operations. The full language only uses 8 control characters: `+-><.,[]`. - -Imagine if C was designed for computer architectures that run Brainfuck, that is what Mastermind is intended to be. - -## Contents - -This documentation currently includes the following articles: - -- Brainfuck -- Variables -- Conditionals -- Loops -- Functions -- Inline Brainfuck -- Standard Library -- 2D Mastermind -- Optimisations diff --git a/docs/loops.md b/docs/loops.md deleted file mode 100644 index 99b5c85..0000000 --- a/docs/loops.md +++ /dev/null @@ -1,125 +0,0 @@ -Mastermind currently supports three forms of loops: `while`, `drain` and `copy`. - -It should be noted that there is no early breaking in any of these forms, so all clauses in a loop body are always executed in each iteration. - -## While - -The `while` loop operates similarly to other languages, accepting a condition expression, and a loop body. - -The clauses inside the loop body are executed until the condition is falsy (i.e. equal to `0`). The condition is checked before each iteration. - -Note: currently `while` conditions must be direct variable references, this is subject to future compiler updates. - -``` -cell n = 5; -while n { - // do stuff - n -= 1; -} -// n is now equal to 0 -``` - -## Drain - -The `drain` loop mirrors a very common pattern found in Brainfuck programs: decrementing a cell. `drain` accepts an expression, a list of variables to 'drain into', and/or a loop body. - -If the expression is a direct variable reference, then the variable is decremented after each iteration. If not, it is evaluated in a temporary cell, then decremented after each iteration. - -``` -drain var { - // do stuff -} - -// equivalent to: -while var { - // do stuff - var -= 1; -} -``` - -With expressions: - -``` -drain 6 { - output 'a'; -} -// aaaaaa -``` - -The following example leaves `x` unchanged: - -``` -cell x = 7; -drain x - 2 { - output 'b'; -} -// bbbbb -``` - -### Into - -If the `into` keyword is used, followed by a whitespace-separated list of target variables, the targets will be incremented after each iteration. - -``` -cell i; -drain 10 into i { - output '0' + i; -} -// 0123456789 - -// equivalent to: -cell i; -cell ten = 10; -while ten { - output '0' + i; - - i += 1; - ten -= 1; -} -``` - -Another example: - -``` -drain var into other_var other_var_2 *spread_array; - -// equivalent to: -drain var { - other_var += 1; - other_var_2 += 1; - spread_array[0] += 1; - spread_array[1] += 1; - spread_array[2] += 1; - // ... -} -``` - -## Copy - -The `copy` loop acts similarly to the `drain` loop, however the expression must be a direct variable reference, and it is left unchanged afterwards, and its original value is accessible within the loop body. - -``` -cell y; -copy x into y { - // loop body -}; -``` - -An equivalence example: - -``` -cell var = 5; -copy var { - output '0' + var; -} -// 55555 - -// equivalent to: -cell var = 5; -cell temp = var; -while temp { - output '0' + var; - - temp -= 1; -} -``` diff --git a/docs/optimisations.md b/docs/optimisations.md deleted file mode 100644 index e5ca212..0000000 --- a/docs/optimisations.md +++ /dev/null @@ -1,81 +0,0 @@ -The Mastermind compiler includes optional optimisations for generated code. The original goal of Mastermind was to generate very minimal Brainfuck for use in Code Golf competitions, so most of these are aimed at reducing generated code length. - - - -#### Cell Clearing - - - -Optimises clearing cells after they are de-allocated, it does this by tracking their values at compile-time and acting based on a cell's known value. For instance, if a cell can be proven at compile-time to have the value `2`, it is more efficient to clear with `--`, than the typical Brainfuck clear: `[-]`. - -#### Constants - - - -When large values are added in Brainfuck, the naive approach is to use the increment `-` operator for as many times as needed. The constants optimiser will use multiplication to shorten the code needed to add/subtract large values. Example: the value `46` can be achieved by either `++++++++++++++++++++++++++++++++++++++++++++++` or the shorter: `+++++[>+++++++++<-]>+` (5 \* 9 + 1). - -#### Generated Code - - - -Optimises generated Brainfuck code by shortening trivial program segments. - -Currently this is limited to optimising segments of Brainfuck programs with the following operations: `+`, `-`, `>`, `<`, `[-]`. - -``` ---->>><<<++ -// becomes: -- -``` - -An end-to-end example: - -``` -cell h = 4; -cell j = 3; - -h += 10; - -drain 10 { - j = 5; - h += 4; - j += 1; -} - -// compiles to: -++++>+++<++++++++++>>++++++++++[<+<++++>[-]+++++>-] -// after optimisation: -++++++++++++++>+++>++++++++++[-<[-]+++++<++++>>] -``` - -This system finds optimal equivalent segments for classic Brainfuck programs, however for the 2D Brainfuck variant it is not guaranteed, as finding the optimal path between memory cells in a 2D grid is more difficult. The _Generated Code Permutations_ setting enables an exhaustive search for the optimal path when using the 2D Brainfuck variant, otherwise a greedy approach is used. - -#### Empty Blocks - - - -Detects if a code block is empty or has no effect on the program, and prunes the associated clause. - -#### Unreachable Loops - - - -Brainfuck loops will be omitted if the cell they start on can be proven to be `0` at compile-time. - -### Unimplemented Optimisations - -#### Memory Allocations - - - -// TODO - - - -#### Variable Usage - - - -// TODO - - diff --git a/docs/standardlib.md b/docs/standardlib.md deleted file mode 100644 index 5792084..0000000 --- a/docs/standardlib.md +++ /dev/null @@ -1,77 +0,0 @@ -### Mastermind Standard Library - -Currently the Mastermind standard library is very limited, and is effectively a set of example programs included in the web IDE and source repository. - -#### Including files - -You can include/import other files using preprocessor directives. The Mastermind preprocessor is intended to mirror the C preprocessor, however it currently only supports the `#include` directive. - -The following is a basic example: - -``` -// file1.mmi -struct H { - cell a; -} -fn print(struct H h) { - output h.a; -} -``` - -``` -// main file being compiled -#include "file1.mmi" - -struct H h; -h.a = 64; -print(h); -// @ -``` - -#### Standard Library Examples - -The most mature files in the included examples are the following: - -- `bitops`: bitshifting operations for cell types -- `i8`: signed type for 8-bit integers and supporting functions -- `u8`: common supporting functions for cell types -- `u16`: a 16-bit unsigned integer type and supporting functions -- `ifp16`: a signed 16-bit fixed-point number type and supporting functions - -NOTE: due to current lack of header-guard support, importing multiple of these will likely cause a compiler error, until this is implemented, the best way to work around this is to only include `ifp16` as that includes the others. - -Example usage: - -``` -#include - -// read a 16 bit number from stdin, add 55, then print - -struct u16 n; -read(n); - -cell ff = 55; -add(n, ff); -print(n); -output ' '; -debug(n); // print the binary representation -// example input: 16000 -// output: 16055 0011111010110111 -``` - -Example fixed-point usage: - -``` -#include - -struct ifp16 n; -_99p99609375(n); // constant 99.99609375 -struct ifp16 m; -__1p5(m); // constant -1.5 - -divide(n, m); -print(n); -output ' '; -debug(n); -// -66.66 10111101.01010110 -``` diff --git a/docs/variables.md b/docs/variables.md deleted file mode 100644 index c070d79..0000000 --- a/docs/variables.md +++ /dev/null @@ -1,151 +0,0 @@ -#### Cells - -The base data type in Mastermind is the `cell`, this corresponds to a a single 8-bit cell on the Brainfuck tape. - -``` -cell var = 56; -cell c = 'g'; -cell bool = true; // true/false equivalent to 1/0 -``` - -Cells default to `0`. - -#### Input/Output - -The `input` and `output` keywords in Mastermind correspond to the `,` and `.` operators in Brainfuck. `input` simply inputs the next byte from stdin, and `output` outputs a byte to stdout. - -``` -// stdin: 00abc -cell g; -drain 5 { - // do this 5 times - input g; - g += 1; - output g; -} -// stdout: 11bcd -``` - -The simplest way to display text is to output valid ASCII characters. If your Brainfuck implementation supports unicode, that is also possible by outputting multiple bytes. - -``` -output 240; -output 159; -output 164; -output 145; -output 10; -// displays 🤑 (emoji with green cash for tongue) -``` - -#### Cell Arrays - -Variables can also be defined as contiguous arrays of cells. - -``` -// multi-cell: -cell[4] array_example = [1, 2, 3, 4]; -cell[5] string_example = "hello"; -cell[2] foo; -foo[0] = 204; -``` - -#### Structs - -Structure types can be defined with named fields, then instantiated as variables. - -``` -struct struct_name { - cell x; - cell y; - cell[5] zzzzz; -} - -struct struct_name s; -s.x = 4; -s.y = 123; -s.zzzzz[0] += 3; -s.zzzzz[4] = 180; - -// nested struct: -struct Nested { - struct struct_name n; -} -``` - -### Structs and Arrays - -Any type can be repeated into an array/contiguous allocation. This includes cells, structs, arrays of cells, and arrays of structs. - -``` -cell[4][6] mult_arr; // a 6-length array of cell[4] arrays -cell[4][6][2] mult_arr; // 2 * (6-length arrays of cell[4] arrays) - -struct T { - cell a; - cell[4][2] b; -} - -struct T[10] ten_T_structs; -ten_T_structs[4].b[1][3] = 45; - -struct S { - struct T[2][4] matrix_of_T_structs; - cell other; -} - -struct S[3] three_S_structs; -three_S_structs[1].matrix_of_T_structs[3][0] = '5'; -``` - -#### Note: Array indices must be compile-time constant integers - -This is a limitation of Brainfuck, getting around this problem requires more runtime code than is reasonable to include by default, due to the goals of Mastermind. You can implement equivalent behaviour using in-line Brainfuck, structs, and functions. - -### Location specifiers - -The exact memory cells occupied by a variable can be specified: - -``` -// value 1 at tape position 4 -cell a @4 = 1; -// contiguous array of 1s, starting at cell -1 -cell[3] a @-1 = [1, 1, 1]; -``` - -#### Struct subfields - -The byte-order and positioning of a struct's subfields can be specified: - -``` -struct T { - cell a @1; - cell b[2] @3; -} -// struct T's layout: -// (-, a, -, b[0], b[1]) -// '-' denotes an untracked padding cell -``` - -#### Variable - -When using in-line Brainfuck (see other document), the Brainfuck scope's starting position can be specified with variables: - -``` -cell d; -bf @d { - // brainfuck code here -} - -struct G { - cell h; - cell i; - cell j; -} -struct G g; - -bf @g { - // starts on the first cell of g's allocation -} -// equivalent to: -bf @g.h {} -``` diff --git a/docs/variants.md b/docs/variants.md deleted file mode 100644 index 0daf3e7..0000000 --- a/docs/variants.md +++ /dev/null @@ -1,56 +0,0 @@ -The Mastermind compiler can be extended to support Brainfuck variants. - -### 2D Brainfuck - -Mastermind currently supports two-dimensional Brainfuck, this is a Brainfuck variant with an additional dimension in the memory array. - -2D Brainfuck support can be enabled in the compiler settings in the web IDE, adding the following: - -New opcodes for in-line Brainfuck contexts and in generated Brainfuck code: - -- `^`: move up one cell in the grid -- `v`: move down one cell in the grid -- - -#### Memory Allocation Algorithms - -##### Default - -Allocates the closest free cells to the right of the origin. - -##### Zig Zag - -// TODO - - - -##### Spiral - -// TODO - - - -##### Tiles - -// TODO - - diff --git a/load_env.sh b/load_env.sh new file mode 100755 index 0000000..429e3ec --- /dev/null +++ b/load_env.sh @@ -0,0 +1,2 @@ +echo "VITE_GIT_COMMIT_HASH=$(git rev-parse --short HEAD)" > .env +echo "VITE_GIT_COMMIT_BRANCH=$(git branch --show-current)" >> .env diff --git a/package.json b/package.json index c620392..7fd3cfc 100644 --- a/package.json +++ b/package.json @@ -5,10 +5,10 @@ "type": "module", "scripts": { "test:wasm": "cd compiler && cargo test", - "dev": "vite", + "dev": "./load_env.sh && vite", "build:grammar": "yarn run lezer-generator src/lexer/mastermind.grammar -o src/lexer/mastermind_parser.js", "build:wasm": "cd compiler && wasm-pack build --target web", - "build": "tsc && vite build", + "build": "./load_env.sh && tsc && vite build", "profile:wasm": "yarn build:wasm --profiling && yarn build && twiggy top -n 20 compiler/pkg/mastermind_bg.wasm", "preview": "vite preview" }, @@ -30,6 +30,7 @@ }, "devDependencies": { "@lezer/generator": "^1.5.1", + "@types/node": "^24.10.1", "@types/uuid": "^9.0.7", "typescript": "^5.2.2", "vite": "^5.0.0", diff --git a/reference.md b/reference.md new file mode 100644 index 0000000..6c2abce --- /dev/null +++ b/reference.md @@ -0,0 +1,788 @@ +# Mastermind reference + +## Introduction + +Mastermind is a programming language designed to compile to the well-known esoteric language "Brainfuck". + +Brainfuck is essentially a modern interpretation of the classical Turing machine. It consists of a tape of 8-bit values, with simple increment/decrement, move left/right, input/output, and looping operations. The full language only uses 8 control characters: `+-><.,[]`. + +Imagine if C was designed for computer architectures that run Brainfuck, that is what Mastermind is intended to be. + +## Contents + +- [Brainfuck](#brainfuck) +- [Variables](#variables) +- [Conditionals](#conditionals) +- [Loops](#loops) +- [Functions](#functions) +- [Inline Brainfuck](#inline-brainfuck) +- [Standard Library](#standard-library) +- [Variants](#variants) +- [Optimisations](#optimisations) + +## Brainfuck + +Brainfuck is an esoteric programming language, originally designed as a theoretical example of a Turing complete language with an extremely minimal compiler. The name is due to its difficulty, it is significantly more difficult to create complex programs than in any popular modern language. + +### Specification + +When a Brainfuck program is run, it operates on a array/tape of cells, performing operations on the tape. Each cell contains an integer, initialised to 0 by default. The program operates on one cell at a time based on the position of a "tape head". Brainfuck supports the following operations: + +- `+`: increment the value of the current cell +- `-`: decrement the value of the current cell +- `>`: move the tape head one cell to the right +- `<`: move the tape head one cell to the left +- `.`: output the current cell as a byte to stdout +- `,`: input a byte from stdin, overwriting the current cell +- `[`: jump to the corresponding `]` if the current cell is 0 +- `]`: jump to the corresponding `[` if the current cell is not 0 + +A Brainfuck program consists of a list of these commands, which are executed sequentially. The program terminates if the final operation in the list is executed. + +### Interpreter Implementation Details + +The Mastermind IDE and compiler library contains an implementation of a Brainfuck interpreter. This implementation is intended to match the behaviour of the most popular Brainfuck implementations: + +#### 8-bit Wrapping Cells + +In this implementation, each cell is an 8-bit integer that wraps if an increment or decrement operation overflows or underflows. + +E.g. given the current tape cell value is `255`, after an increment (`+`), the cell value is now `0`. + +Similarly: `0`, after a decrement (`-`) becomes `255` + +#### Infinite Bidirectional Tape + +In this implementation, the tape extends infinitely in both directions. + +## Variables + +### Cells + +The base data type in Mastermind is the `cell`, this corresponds to a a single 8-bit cell on the Brainfuck tape. + +``` +cell var = 56; +cell c = 'g'; +cell bool = true; // true/false equivalent to 1/0 +``` + +Cells default to `0`. + +### Input/Output + +The `input` and `output` keywords in Mastermind correspond to the `,` and `.` operators in Brainfuck. `input` simply inputs the next byte from stdin, and `output` outputs a byte to stdout. + +``` +// stdin: 00abc +cell g; +drain 5 { + // do this 5 times + input g; + g += 1; + output g; +} +// stdout: 11bcd +``` + +The simplest way to display text is to output valid ASCII characters. If your Brainfuck implementation supports unicode, that is also possible by outputting multiple bytes. + +``` +output 240; +output 159; +output 164; +output 145; +output 10; +// displays 🤑 (emoji with green cash for tongue) +``` + +### Cell Arrays + +Variables can also be defined as contiguous arrays of cells. + +``` +// multi-cell: +cell[4] array_example = [1, 2, 3, 4]; +cell[5] string_example = "hello"; +cell[2] foo; +foo[0] = 204; +``` + +### Structs + +Structure types can be defined with named fields, then instantiated as variables. + +``` +struct struct_name { + cell x; + cell y; + cell[5] zzzzz; +} + +struct struct_name s; +s.x = 4; +s.y = 123; +s.zzzzz[0] += 3; +s.zzzzz[4] = 180; + +// nested struct: +struct Nested { + struct struct_name n; +} +``` + +### Structs and Arrays + +Any type can be repeated into an array/contiguous allocation. This includes cells, structs, arrays of cells, and arrays of structs. + +``` +cell[4][6] mult_arr; // a 6-length array of cell[4] arrays +cell[4][6][2] mult_arr; // 2 * (6-length arrays of cell[4] arrays) + +struct T { + cell a; + cell[4][2] b; +} + +struct T[10] ten_T_structs; +ten_T_structs[4].b[1][3] = 45; + +struct S { + struct T[2][4] matrix_of_T_structs; + cell other; +} + +struct S[3] three_S_structs; +three_S_structs[1].matrix_of_T_structs[3][0] = '5'; +``` + +#### Note: Array indices must be compile-time constant integers + +This is a limitation of Brainfuck, getting around this problem requires more runtime code than is reasonable to include by default, due to the goals of Mastermind. You can implement equivalent behaviour using in-line Brainfuck, structs, and functions. + +### Location specifiers + +The exact memory cells occupied by a variable can be specified: + +``` +// value 1 at tape position 4 +cell a @4 = 1; +// contiguous array of 1s, starting at cell -1 +cell[3] a @-1 = [1, 1, 1]; +``` + +#### Struct subfields + +The byte-order and positioning of a struct's subfields can be specified: + +``` +struct T { + cell a @1; + cell b[2] @3; +} +// struct T's layout: +// (-, a, -, b[0], b[1]) +// '-' denotes an untracked padding cell +``` + +## Conditionals + +Mastermind supports basic `if`/`else` statements. An `if` statement accepts an expression that evaluates to a `cell` type, if the expression is evaluated to be truthy (i.e. not equal to `0`), then the `if` block is executed, otherwise the optional `else` block is executed. This behaviour can be inverted using the `not` keyword. + +``` +if 13 { + output "13"; +} + +if not true { + // unreachable +} + +cell var = 4; +if var { + output "true"; +} else { + output "false"; +} + +// typical equivalence use-case: +if not var - 10 { + // == +} else { + // != +} +``` + +## Loops + +Mastermind currently supports three forms of loops: `while`, `drain` and `copy`. + +It should be noted that there is no early breaking in any of these forms, so all clauses in a loop body are always executed in each iteration. + +### While + +The `while` loop operates similarly to other languages, accepting a condition expression, and a loop body. + +The clauses inside the loop body are executed until the condition is falsy (i.e. equal to `0`). The condition is checked before each iteration. + +Note: currently `while` conditions must be direct variable references, this is subject to future compiler updates. + +``` +cell n = 5; +while n { + // do stuff + n -= 1; +} +// n is now equal to 0 +``` + +### Drain + +The `drain` loop mirrors a very common pattern found in Brainfuck programs: decrementing a cell. `drain` accepts an expression, a list of variables to 'drain into', and/or a loop body. + +If the expression is a direct variable reference, then the variable is decremented after each iteration. If not, it is evaluated in a temporary cell, then decremented after each iteration. + +``` +drain var { + // do stuff +} + +// equivalent to: +while var { + // do stuff + var -= 1; +} +``` + +With expressions: + +``` +drain 6 { + output 'a'; +} +// aaaaaa +``` + +The following example leaves `x` unchanged: + +``` +cell x = 7; +drain x - 2 { + output 'b'; +} +// bbbbb +``` + +#### Into + +If the `into` keyword is used, followed by a whitespace-separated list of target variables, the targets will be incremented after each iteration. + +``` +cell i; +drain 10 into i { + output '0' + i; +} +// 0123456789 + +// equivalent to: +cell i; +cell ten = 10; +while ten { + output '0' + i; + + i += 1; + ten -= 1; +} +``` + +Another example: + +``` +drain var into other_var other_var_2 *spread_array; + +// equivalent to: +drain var { + other_var += 1; + other_var_2 += 1; + spread_array[0] += 1; + spread_array[1] += 1; + spread_array[2] += 1; + // ... +} +``` + +### Copy + +The `copy` loop acts similarly to the `drain` loop, however the expression must be a direct variable reference, and it is left unchanged afterwards, and its original value is accessible within the loop body. + +``` +cell y; +copy x into y { + // loop body +}; +``` + +An equivalence example: + +``` +cell var = 5; +copy var { + output '0' + var; +} +// 55555 + +// equivalent to: +cell var = 5; +cell temp = var; +while temp { + output '0' + var; + + temp -= 1; +} +``` + +## Functions + +Mastermind supports a minimal function system: functions can be defined with a name and a fixed number of typed arguments. + +``` +fn newline() { output '\n'; } + +fn print_zeros(cell num) { + copy num { + output '0'; + } + newline(); +} + +// expressions as arguments are currently not supported, +// i.e. print_zeros(9) +cell g = 9; +print_zeros(g); +``` + +Unlike most modern programming languages, functions are not considered first-class values. Functions in Mastermind are in-lined at compile-time, and all arguments are passed by reference. Values can be returned by editing passed in arguments, or editing variables in an outer scope, although the latter makes a function less portable. + +``` +fn is_zero(cell in, cell out) { + out = true; + if in { + out = false; + } +} + +cell value = 'h'; +cell falsy; +is_zero(value, falsy); +``` + +Example showing a function reading a variable from an outer scope: + +``` +fn print_global_g(cell count) { + copy count { + output chr; + } +} + +cell chr = 'g'; +cell count = 3; +print_global_g(count); +// ggg + +{ + // inner scope with a new 'g' allocation + cell chr = 'G'; + count = 5; + print_global_g(count); + // GGGGG +} + +// same call again, now the inner chr has been freed +print_global_g(count); +// ggg +``` + +### Types and Overloads + +Functions support overloads with different types or number of arguments. Examples of supported behaviour: + +``` +fn func1() { + output '1'; +} +fn func1(cell a) { + output '2'; +} +fn func1(cell a, cell b) { + output '3'; +} +struct X { cell a; } +fn func1(struct X x) { + output '4'; +} +struct Y { cell a; } +fn func1(struct Y y) { + output '5'; +} +fn func1(cell a, struct X x, struct Y y) { + output '6'; +} +cell n; +struct X x; +struct Y y; +func1(); +func1(n); +func1(n, n); +func1(x); +func1(y); +func1(n, x, y); +// 123456 +``` + +## In-Line Brainfuck + +In-line Brainfuck allows the programmer to define custom behaviour as if writing raw Brainfuck, inspired by in-line assembly in C. + +Basic example: + +``` +// find the next cell that equals -1 +bf { + +[->+]- +} +``` + +More advanced example: + +``` +// input a line of lowercase letters and output the uppercase version +// this is an intentionally inefficient example +bf @3 clobbers var *spread_var etc { + ,----------[++++++++++>,----------] + <[<]> + [ + { + cell g @0; + assert g unknown; + output g + ('A' - 'a'); + // embedded Mastermind! + } + > + ] + // now clear and return + <[[-]<]> +} +``` + +It is the programmer's responsibility to clear used cells and return back to the cell in which they started the in-line Brainfuck context. If the programmer does not do this, any following Mastermind code may break. + +### Memory location specifiers + +The exact location to start an in-line Brainfuck context can be specified: + +``` +cell var @3 = 4; +// compiled: >>>++++ + +bf @4 { + <><><> +} +// compiled: >>>><><><> +``` + +Variables can also be used: + +``` +cell d; +bf @d { + // brainfuck code here +} + +struct G { + cell h; + cell i; + cell j; +} +struct G g; + +bf @g { + // starts on the first cell of g's allocation +} +// equivalent to: +bf @g.h {} +``` + +### Clobbering and Assertions + +With optimisations enabled, Mastermind will try to predict the value of cells at compile-time, so it can prevent unnecessary cell clean-ups and unreachable code. If your in-line Brainfuck affects existing Mastermind variables, you should tell the compiler using the `clobbers` keyword, the syntax is similar to the `drain into` target list: + +``` +bf clobbers var *spread_var other_var etc {} +``` + +The compiler will now assume nothing about the values of those variables afterwards. + +If instead you want to tell the compiler specifically that a variable has become a certain value, you can use `assert`: + +``` +assert var equals 3; +// most common use cases: +assert var equals 0; +assert var unknown; +``` + +Asserting a variable as `unknown` is equivalent to clobbering. + +### Embedded Mastermind + +You can embed high-level Mastermind code within a Brainfuck context. During compilation the embedded Mastermind is compiled and the generated Brainfuck is inserted in place. + +``` +// input 3 n-length lines of input +bf { + >+++<,[ + { + cell input_char @0; + assert input_char unknown; + cell length_remaining @1; + assert length_remaining unknown; + + cell next_char @2; + cell next_length_remaining @3; + if not input_char - '\n' { + length_remaining -= 1; + } + if length_remaining { + drain length_remaining into next_length_remaining; + input next_char; + } + } + >>] +} +``` + +Embedded Mastermind can include in-line Brainfuck, this is recursive. For example: + +``` +// top-level Mastermind context +bf { + ++>> + { + // inner Mastermind context + bf { + ++>> + { + // inner inner Mastermind context + bf { + ++>> + { + //... + } + <<-- + } + } + <<-- + } + } + <<-- +} +``` + +The compiler cannot guarantee the global head position at compile time within an in-line Brainfuck context. Therefore memory location specifiers are relative to the current embedded Mastermind context, not the entire program. + +Also, top-level variables are not cleared by default in Mastermind contexts, this allows you to "leave" variables in cells for your Brainfuck to use. If you want variables in your embedded Mastermind to be automatically cleared, you can open a scope at the top level: + +``` +bf { + ++----++[][][<><><>] // the program doesn't matter for this example + { + // variables here will not be cleared + cell g @2; + assert g unknown; + { + // variables here will be cleared + let b = 32; + } + } + {{ + // self-cleaning Mastermind code here + }} +} +``` + +## Standard Library + +Currently the Mastermind standard library is very limited, and is effectively a set of example programs included in the web IDE and source repository. + +### Including files + +You can include/import other files using preprocessor directives. The Mastermind preprocessor is intended to mirror the C preprocessor, however it currently only supports the `#include` directive. + +The following is a basic example: + +``` +// file1.mmi +struct H { + cell a; +} +fn print(struct H h) { + output h.a; +} +``` + +``` +// main file being compiled +#include "file1.mmi" + +struct H h; +h.a = 64; +print(h); +// @ +``` + +### Standard Library Examples + +The most mature files in the included examples are the following: + +- `bitops`: bitshifting operations for cell types +- `i8`: signed type for 8-bit integers and supporting functions +- `u8`: common supporting functions for cell types +- `u16`: a 16-bit unsigned integer type and supporting functions +- `ifp16`: a signed 16-bit fixed-point number type and supporting functions + +NOTE: due to current lack of header-guard support, importing multiple of these will likely cause a compiler error, until this is implemented, the best way to work around this is to only include `ifp16` as that includes the others. + +Example usage: + +``` +#include + +// read a 16 bit number from stdin, add 55, then print + +struct u16 n; +read(n); + +cell ff = 55; +add(n, ff); +print(n); +output ' '; +debug(n); // print the binary representation +// example input: 16000 +// output: 16055 0011111010110111 +``` + +Example fixed-point usage: + +``` +#include + +struct ifp16 n; +_99p99609375(n); // constant 99.99609375 +struct ifp16 m; +__1p5(m); // constant -1.5 + +divide(n, m); +print(n); +output ' '; +debug(n); +// -66.66 10111101.01010110 +``` + +## Variants + +The Mastermind compiler can be extended to support Brainfuck variants. + +### Supported Variants: + +#### Classic (1D) Brainfuck + +This is the default behaviour, typical Brainfuck implementation as described in [Brainfuck](#brainfuck). + +#### 2D Brainfuck + +Mastermind currently supports two-dimensional Brainfuck, this is a Brainfuck variant with an additional dimension in the memory array. + +2D Brainfuck support can be enabled in the compiler settings in the web IDE, adding the following features: + +- New opcodes for in-line Brainfuck contexts and in generated Brainfuck code: + - `^`: move up one cell in the grid + - `v`: move down one cell in the grid +- The ability to specify 2D coordinates for location specifiers: + ``` + cell var @(5, -7) = 'a'; + bf @var {[-<<<<<^^^^^^^+>>>>>vvvvvvv]} + bf @(0, 0) {.....} + // aaaaa + ``` +- Three new memory allocation strategies for generated 2D code: + - Zig Zag + - Spiral + - Tiles + + +## Optimisations + +The Mastermind compiler includes optional optimisations for generated code. The original goal of Mastermind was to generate very minimal Brainfuck for use in Code Golf competitions, so most of these are aimed at reducing generated code length. + + + +### Cell Clearing + + + +Optimises clearing cells after they are de-allocated, it does this by tracking their values at compile-time and acting based on a cell's known value. For instance, if a cell can be proven at compile-time to have the value `2`, it is more efficient to clear with `--`, than the typical Brainfuck clear: `[-]`. + +### Constants + + + +When large values are added in Brainfuck, the naive approach is to use the increment `-` operator for as many times as needed. The constants optimiser will use multiplication to shorten the code needed to add/subtract large values. Example: the value `46` can be achieved by either `++++++++++++++++++++++++++++++++++++++++++++++` or the shorter: `+++++[>+++++++++<-]>+` (5 \* 9 + 1). + +### Generated Code + + + +Optimises generated Brainfuck code by shortening trivial program segments. + +Currently this is limited to optimising segments of Brainfuck programs with the following operations: `+`, `-`, `>`, `<`, `[-]`. + +``` +--->>><<<++ +// becomes: +- +``` + +An end-to-end example: + +``` +cell h = 4; +cell j = 3; + +h += 10; + +drain 10 { + j = 5; + h += 4; + j += 1; +} + +// compiles to: +++++>+++<++++++++++>>++++++++++[<+<++++>[-]+++++>-] +// after optimisation: +++++++++++++++>+++>++++++++++[-<[-]+++++<++++>>] +``` + +This system finds optimal equivalent segments for classic Brainfuck programs, however for the 2D Brainfuck variant it is not guaranteed, as finding the optimal path between memory cells in a 2D grid is more difficult. The _Generated Code Permutations_ setting enables an exhaustive search for the optimal path when using the 2D Brainfuck variant, otherwise a greedy approach is used. + +### Empty Blocks + + + +Detects if a code block is empty or has no effect on the program, and prunes the associated clause. + +### Unreachable Loops + + + +Brainfuck loops will be omitted if the cell they start on can be proven to be `0` at compile-time. diff --git a/runningMastermind.md b/runningMastermind.md deleted file mode 100644 index 0c24a89..0000000 --- a/runningMastermind.md +++ /dev/null @@ -1,36 +0,0 @@ -# Running Mastermind - -### 1. Install Rust -Install rust through the following website - https://www.rust-lang.org/tools/install - -This will also install Cargo which is needed to build the project - -### 2. Install Yarn -If not currently installed please install Yarn if unsure follow this guide - https://classic.yarnpkg.com/lang/en/docs/install - -### 3. Install wasm-pack -Install using Yarn, Cargo or the following guide https://rustwasm.github.io/wasm-pack/installer/ - -### 4. Run Yarn Install -Install the Javascript dendencies by running -```bash - yarn install -``` - -### 5. Build the grammar -Build the grammar using the following yarn command -```bash - yarn build:grammar -``` - -### 6. Build Web Assembly Pack -Build Web Assembly Pack using the following yarn command -```bash - yarn build:wasm -``` - -### 7. Run Dev Mode -Run Dev mode using the following command -```bash - yarn dev -``` \ No newline at end of file diff --git a/src/App.css b/src/App.css index c02d1ad..5a0dfbc 100644 --- a/src/App.css +++ b/src/App.css @@ -22,16 +22,18 @@ } .sidebar { - flex: 1; - display: flex; flex-direction: column; align-items: center; justify-content: center; overflow: hidden; + padding-top: 0.5em; + padding-bottom: 0.5em; + padding-left: 0.25em; + padding-right: 0.25em; + gap: 1em; } - .code-panel { position: relative; background-color: var(--BG-2); diff --git a/src/App.tsx b/src/App.tsx index 533790e..bb0fea6 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -567,9 +567,7 @@ const App: Component = () => { -
- -
+ ); diff --git a/src/components/Docs.tsx b/src/components/Docs.tsx deleted file mode 100644 index 08961ef..0000000 --- a/src/components/Docs.tsx +++ /dev/null @@ -1,104 +0,0 @@ -import { Portal } from "solid-js/web"; -import { SolidMarkdown } from "solid-markdown"; -import remarkGfm from "remark-gfm"; -import { IoClose } from "solid-icons/io"; -import { Component, createEffect, createSignal, JSX, Show } from "solid-js"; -import { useAppContext } from "../App"; - -import intro from "../../docs/intro.md?raw"; -import brainfuck from "../../docs/brainfuck.md?raw"; -import variables from "../../docs/variables.md?raw"; -import conditionals from "../../docs/conditionals.md?raw"; -import loops from "../../docs/loops.md?raw"; -import functions from "../../docs/functions.md?raw"; -import inlinebrainfuck from "../../docs/inlinebrainfuck.md?raw"; -import standardlib from "../../docs/standardlib.md?raw"; -import variants from "../../docs/variants.md?raw"; -import optimisations from "../../docs/optimisations.md?raw"; - -import { FaSolidArrowLeftLong, FaSolidArrowRightLong } from "solid-icons/fa"; -const DocsModal: Component<{ style?: JSX.CSSProperties }> = () => { - const app = useAppContext()!; - const docs = { - ["Introduction"]: intro, - ["Brainfuck"]: brainfuck, - ["Variables"]: variables, - ["Conditionals"]: conditionals, - ["Loops"]: loops, - ["Functions"]: functions, - ["Inline Brainfuck"]: inlinebrainfuck, - ["Standard Library"]: standardlib, - ["Brainfuck Variants"]: variants, - ["Optimisations"]: optimisations, - }; - const titles = Object.keys(docs); - const [selected, setSelected] = createSignal(titles[0]); - const [docsContent, setDocsContent] = createSignal( - docs[selected() as keyof typeof docs] ?? "" - ); - createEffect(() => { - setDocsContent(docs[selected() as keyof typeof docs] ?? ""); - }); - - function nextDoc() { - setSelected(titles[(titles.indexOf(selected() ?? "") + 1) % titles.length]); - } - function prevDoc() { - setSelected( - titles[ - (titles.indexOf(selected() ?? "") - 1 + titles.length) % titles.length - ] - ); - } - - return ( - - {/* The weirdest solid js feature, puts the component into the top level html body */} - -
app.setDocsOpen(false)} - > -
e.stopPropagation()}> - - - -
- - {docsContent()} - -
- app.setDocsOpen(false)} - /> -
-
-
-
- ); -}; - -export default DocsModal; diff --git a/src/components/Settings.tsx b/src/components/Settings.tsx index ae82d59..54142a5 100644 --- a/src/components/Settings.tsx +++ b/src/components/Settings.tsx @@ -5,25 +5,24 @@ import { useAppContext } from "../App"; // TODO: FIX THIS SO WE DON'T HAVE 2 PERSISTED VALUES ONLY ONE const SettingsModal: Component<{ style?: JSX.CSSProperties }> = () => { + // TODO: refactor this const MemoryAllocationOptions: string[] = [ - "1D Mastermind", - "2D Mastermind - Zig Zag", - "2D Mastermind - Spiral", - "2D Mastermind - Tiles", - //NOT IMPLEMENTED - // "2D Mastermind - Nearest", + "Classic", + "2D Zig Zag", + "2D Spiral", + "2D Tiles", ]; - const tickboxKeys: (keyof OptimisationSettings)[] = [ - "optimise_cell_clearing", - "optimise_constants", - "optimise_empty_blocks", - "optimise_generated_code", - "optimise_generated_all_permutations", - "optimise_memory_allocation", - "optimise_unreachable_loops", - "optimise_variable_usage", - ]; + const tickboxKeys: (keyof OptimisationSettings)[] = [ + "optimise_cell_clearing", + "optimise_constants", + "optimise_empty_blocks", + "optimise_generated_code", + "optimise_generated_all_permutations", + "optimise_unreachable_loops", + // "optimise_memory_allocation", + // "optimise_variable_usage", + ]; const app = useAppContext()!; return ( @@ -34,110 +33,113 @@ const SettingsModal: Component<{ style?: JSX.CSSProperties }> = () => { class="readme-modal-container" onClick={() => app.setSettingsOpen(false)} > -
e.stopPropagation()}> -

SETTINGS

-
- Optimisations: - - app.setConfig((prev) => { - const b = tickboxKeys.some((key) => !prev[key]); - return { - ...prev, - ...Object.fromEntries( - tickboxKeys.map((key) => [key, b]) - ) - } as MastermindConfig; - }) - } - > - (toggle all) - - -
{ - const target = e.target as HTMLInputElement; - app.setConfig((prev) => ({ - ...prev, - [target.name]: !!target.checked, - })); - }} +
e.stopPropagation()}> +

SETTINGS

+
+ + Optimisations: + + app.setConfig((prev) => { + const b = tickboxKeys.some((key) => !prev[key]); + return { + ...prev, + ...Object.fromEntries( + tickboxKeys.map((key) => [key, b]) + ), + } as MastermindConfig; + }) + } > - tickboxKeys.includes(key as keyof OptimisationSettings) - )}> - {([key, enabled]: [string, boolean]) => ( - - )} - - - -
- 2D GENERATION: + (toggle all) +
-
+ { + const target = e.target as HTMLInputElement; + app.setConfig((prev) => ({ + ...prev, + [target.name]: !!target.checked, + })); + }} + > + + tickboxKeys.includes(key as keyof OptimisationSettings) + )} + > + {([key, enabled]: [string, boolean]) => ( - - - -
- + + +
+ 2D GENERATION: +
+
+ + + +
+ app.setSettingsOpen(false)} + /> +
@@ -152,9 +154,9 @@ interface OptimisationSettings { optimise_empty_blocks: boolean; optimise_generated_code: boolean; optimise_generated_all_permutations: boolean; - optimise_memory_allocation: boolean; optimise_unreachable_loops: boolean; - optimise_variable_usage: boolean; + // optimise_memory_allocation: boolean; + // optimise_variable_usage: boolean; } interface TwoDimensionalSettings { @@ -170,22 +172,22 @@ const optimisationLabels: Record = { optimise_cell_clearing: "cell clearing", optimise_constants: "constants", optimise_empty_blocks: "empty blocks", - optimise_generated_code: "generated code", - optimise_generated_all_permutations: "generated code permutations (May slow larger projects)", - optimise_memory_allocation: "memory allocations", optimise_unreachable_loops: "unreachable loops", - optimise_variable_usage: "variable usage", + optimise_generated_code: "generated code", + optimise_generated_all_permutations: "generated code permutations", + // optimise_memory_allocation: "memory allocations", + // optimise_variable_usage: "variable usage", }; export const DEFAULT_MASTERMIND_CONFIG = { - optimise_cell_clearing: false, - optimise_constants: false, - optimise_empty_blocks: false, - optimise_generated_code: false, - optimise_generated_all_permutations: false, - optimise_memory_allocation: false, - optimise_unreachable_loops: false, - optimise_variable_usage: false, - memory_allocation_method: 0, - enable_2d_grid: false, - }; + optimise_cell_clearing: false, + optimise_constants: false, + optimise_empty_blocks: false, + optimise_generated_code: false, + optimise_generated_all_permutations: false, + optimise_unreachable_loops: false, + // optimise_memory_allocation: false, + // optimise_variable_usage: false, + memory_allocation_method: 0, + enable_2d_grid: false, +}; diff --git a/src/panels/CompilerPanel.tsx b/src/panels/CompilerPanel.tsx index 9c4341d..932efdd 100644 --- a/src/panels/CompilerPanel.tsx +++ b/src/panels/CompilerPanel.tsx @@ -34,10 +34,6 @@ const CompilerPanel: Component<{ style?: JSX.CSSProperties }> = (props) => { await app.compile(entryFileId, app.config()); }; - createEffect(() => { - console.log(app.fileStates); - }); - return (
@@ -48,7 +44,7 @@ const CompilerPanel: Component<{ style?: JSX.CSSProperties }> = (props) => {